]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/avx512vlintrin.h
re PR fortran/71880 (pointer to allocatable character)
[thirdparty/gcc.git] / gcc / config / i386 / avx512vlintrin.h
CommitLineData
85ec4feb 1/* Copyright (C) 2014-2018 Free Software Foundation, Inc.
936c0fe4
AI
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512VLINTRIN_H_INCLUDED
29#define _AVX512VLINTRIN_H_INCLUDED
30
936c0fe4
AI
31#ifndef __AVX512VL__
32#pragma GCC push_options
33#pragma GCC target("avx512vl")
34#define __DISABLE_AVX512VL__
35#endif /* __AVX512VL__ */
36
37/* Internal data types for implementing the intrinsics. */
38typedef unsigned int __mmask32;
39
40extern __inline __m256d
41__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
42_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
43{
44 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
45 (__v4df) __W,
46 (__mmask8) __U);
47}
48
49extern __inline __m256d
50__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
51_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
52{
53 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
54 (__v4df)
55 _mm256_setzero_pd (),
56 (__mmask8) __U);
57}
58
59extern __inline __m128d
60__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
61_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
62{
63 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
64 (__v2df) __W,
65 (__mmask8) __U);
66}
67
68extern __inline __m128d
69__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
70_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
71{
72 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
73 (__v2df)
74 _mm_setzero_pd (),
75 (__mmask8) __U);
76}
77
78extern __inline __m256d
79__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
80_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
81{
82 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
83 (__v4df) __W,
84 (__mmask8) __U);
85}
86
87extern __inline __m256d
88__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
89_mm256_maskz_load_pd (__mmask8 __U, void const *__P)
90{
91 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
92 (__v4df)
93 _mm256_setzero_pd (),
94 (__mmask8) __U);
95}
96
97extern __inline __m128d
98__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
99_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
100{
101 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
102 (__v2df) __W,
103 (__mmask8) __U);
104}
105
106extern __inline __m128d
107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
108_mm_maskz_load_pd (__mmask8 __U, void const *__P)
109{
110 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
111 (__v2df)
112 _mm_setzero_pd (),
113 (__mmask8) __U);
114}
115
116extern __inline void
117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
118_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
119{
120 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
121 (__v4df) __A,
122 (__mmask8) __U);
123}
124
125extern __inline void
126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
127_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
128{
129 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
130 (__v2df) __A,
131 (__mmask8) __U);
132}
133
134extern __inline __m256
135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
137{
138 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
139 (__v8sf) __W,
140 (__mmask8) __U);
141}
142
143extern __inline __m256
144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
145_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
146{
147 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
148 (__v8sf)
149 _mm256_setzero_ps (),
150 (__mmask8) __U);
151}
152
153extern __inline __m128
154__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
155_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
156{
157 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
158 (__v4sf) __W,
159 (__mmask8) __U);
160}
161
162extern __inline __m128
163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164_mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
165{
166 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
167 (__v4sf)
168 _mm_setzero_ps (),
169 (__mmask8) __U);
170}
171
172extern __inline __m256
173__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
174_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
175{
176 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
177 (__v8sf) __W,
178 (__mmask8) __U);
179}
180
181extern __inline __m256
182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
183_mm256_maskz_load_ps (__mmask8 __U, void const *__P)
184{
185 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
186 (__v8sf)
187 _mm256_setzero_ps (),
188 (__mmask8) __U);
189}
190
191extern __inline __m128
192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
193_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
194{
195 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
196 (__v4sf) __W,
197 (__mmask8) __U);
198}
199
200extern __inline __m128
201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
202_mm_maskz_load_ps (__mmask8 __U, void const *__P)
203{
204 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
205 (__v4sf)
206 _mm_setzero_ps (),
207 (__mmask8) __U);
208}
209
210extern __inline void
211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
212_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
213{
214 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
215 (__v8sf) __A,
216 (__mmask8) __U);
217}
218
219extern __inline void
220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
221_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
222{
223 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
224 (__v4sf) __A,
225 (__mmask8) __U);
226}
227
228extern __inline __m256i
229__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
230_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
231{
232 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
233 (__v4di) __W,
234 (__mmask8) __U);
235}
236
237extern __inline __m256i
238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
239_mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
240{
241 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
242 (__v4di)
243 _mm256_setzero_si256 (),
244 (__mmask8) __U);
245}
246
247extern __inline __m128i
248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
249_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
250{
251 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
252 (__v2di) __W,
253 (__mmask8) __U);
254}
255
256extern __inline __m128i
257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
258_mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
259{
260 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
261 (__v2di)
a25a7887 262 _mm_setzero_si128 (),
936c0fe4
AI
263 (__mmask8) __U);
264}
265
266extern __inline __m256i
267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
268_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
269{
270 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
271 (__v4di) __W,
272 (__mmask8)
273 __U);
274}
275
276extern __inline __m256i
277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
278_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
279{
280 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
281 (__v4di)
282 _mm256_setzero_si256 (),
283 (__mmask8)
284 __U);
285}
286
287extern __inline __m128i
288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
289_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
290{
291 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
292 (__v2di) __W,
293 (__mmask8)
294 __U);
295}
296
297extern __inline __m128i
298__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
299_mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
300{
301 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
302 (__v2di)
a25a7887 303 _mm_setzero_si128 (),
936c0fe4
AI
304 (__mmask8)
305 __U);
306}
307
308extern __inline void
309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
310_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
311{
312 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
313 (__v4di) __A,
314 (__mmask8) __U);
315}
316
317extern __inline void
318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
319_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
320{
321 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
322 (__v2di) __A,
323 (__mmask8) __U);
324}
325
326extern __inline __m256i
327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
328_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
329{
330 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
331 (__v8si) __W,
332 (__mmask8) __U);
333}
334
335extern __inline __m256i
336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
337_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
338{
339 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
340 (__v8si)
341 _mm256_setzero_si256 (),
342 (__mmask8) __U);
343}
344
345extern __inline __m128i
346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
347_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
348{
349 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
350 (__v4si) __W,
351 (__mmask8) __U);
352}
353
354extern __inline __m128i
355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
356_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
357{
358 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
359 (__v4si)
360 _mm_setzero_si128 (),
361 (__mmask8) __U);
362}
363
364extern __inline __m256i
365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
366_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
367{
368 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
369 (__v8si) __W,
370 (__mmask8)
371 __U);
372}
373
374extern __inline __m256i
375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
376_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
377{
378 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
379 (__v8si)
380 _mm256_setzero_si256 (),
381 (__mmask8)
382 __U);
383}
384
385extern __inline __m128i
386__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
387_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
388{
389 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
390 (__v4si) __W,
391 (__mmask8)
392 __U);
393}
394
395extern __inline __m128i
396__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
397_mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
398{
399 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
400 (__v4si)
401 _mm_setzero_si128 (),
402 (__mmask8)
403 __U);
404}
405
406extern __inline void
407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
408_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
409{
410 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
411 (__v8si) __A,
412 (__mmask8) __U);
413}
414
415extern __inline void
416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
417_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
418{
419 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
420 (__v4si) __A,
421 (__mmask8) __U);
422}
423
936c0fe4
AI
424extern __inline __m128d
425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
426_mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
427{
428 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
429 (__v2df) __B,
430 (__v2df) __W,
431 (__mmask8) __U);
432}
433
434extern __inline __m128d
435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
436_mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B)
437{
438 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
439 (__v2df) __B,
440 (__v2df)
441 _mm_setzero_pd (),
442 (__mmask8) __U);
443}
444
445extern __inline __m256d
446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
447_mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A,
448 __m256d __B)
449{
450 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
451 (__v4df) __B,
452 (__v4df) __W,
453 (__mmask8) __U);
454}
455
456extern __inline __m256d
457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
458_mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
459{
460 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
461 (__v4df) __B,
462 (__v4df)
463 _mm256_setzero_pd (),
464 (__mmask8) __U);
465}
466
467extern __inline __m128
468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 469_mm_mask_add_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
936c0fe4
AI
470{
471 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
472 (__v4sf) __B,
473 (__v4sf) __W,
474 (__mmask8) __U);
475}
476
477extern __inline __m128
478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 479_mm_maskz_add_ps (__mmask8 __U, __m128 __A, __m128 __B)
936c0fe4
AI
480{
481 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
482 (__v4sf) __B,
483 (__v4sf)
484 _mm_setzero_ps (),
485 (__mmask8) __U);
486}
487
488extern __inline __m256
489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 490_mm256_mask_add_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
936c0fe4
AI
491{
492 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
493 (__v8sf) __B,
494 (__v8sf) __W,
495 (__mmask8) __U);
496}
497
498extern __inline __m256
499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 500_mm256_maskz_add_ps (__mmask8 __U, __m256 __A, __m256 __B)
936c0fe4
AI
501{
502 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
503 (__v8sf) __B,
504 (__v8sf)
505 _mm256_setzero_ps (),
506 (__mmask8) __U);
507}
508
509extern __inline __m128d
510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511_mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
512{
513 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
514 (__v2df) __B,
515 (__v2df) __W,
516 (__mmask8) __U);
517}
518
519extern __inline __m128d
520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
521_mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B)
522{
523 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
524 (__v2df) __B,
525 (__v2df)
526 _mm_setzero_pd (),
527 (__mmask8) __U);
528}
529
530extern __inline __m256d
531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
532_mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
533 __m256d __B)
534{
535 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
536 (__v4df) __B,
537 (__v4df) __W,
538 (__mmask8) __U);
539}
540
541extern __inline __m256d
542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
543_mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
544{
545 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
546 (__v4df) __B,
547 (__v4df)
548 _mm256_setzero_pd (),
549 (__mmask8) __U);
550}
551
552extern __inline __m128
553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 554_mm_mask_sub_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
936c0fe4
AI
555{
556 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
557 (__v4sf) __B,
558 (__v4sf) __W,
559 (__mmask8) __U);
560}
561
562extern __inline __m128
563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 564_mm_maskz_sub_ps (__mmask8 __U, __m128 __A, __m128 __B)
936c0fe4
AI
565{
566 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
567 (__v4sf) __B,
568 (__v4sf)
569 _mm_setzero_ps (),
570 (__mmask8) __U);
571}
572
573extern __inline __m256
574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 575_mm256_mask_sub_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
936c0fe4
AI
576{
577 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
578 (__v8sf) __B,
579 (__v8sf) __W,
580 (__mmask8) __U);
581}
582
583extern __inline __m256
584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 585_mm256_maskz_sub_ps (__mmask8 __U, __m256 __A, __m256 __B)
936c0fe4
AI
586{
587 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
588 (__v8sf) __B,
589 (__v8sf)
590 _mm256_setzero_ps (),
591 (__mmask8) __U);
592}
593
594extern __inline void
595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
596_mm256_store_epi64 (void *__P, __m256i __A)
597{
598 *(__m256i *) __P = __A;
599}
600
601extern __inline void
602__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
603_mm_store_epi64 (void *__P, __m128i __A)
604{
605 *(__m128i *) __P = __A;
606}
607
608extern __inline __m256d
609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
610_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
611{
fc9cf6da 612 return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
936c0fe4
AI
613 (__v4df) __W,
614 (__mmask8) __U);
615}
616
617extern __inline __m256d
618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
619_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
620{
fc9cf6da 621 return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
936c0fe4
AI
622 (__v4df)
623 _mm256_setzero_pd (),
624 (__mmask8) __U);
625}
626
627extern __inline __m128d
628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
629_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
630{
fc9cf6da 631 return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
936c0fe4
AI
632 (__v2df) __W,
633 (__mmask8) __U);
634}
635
636extern __inline __m128d
637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
638_mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
639{
fc9cf6da 640 return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
936c0fe4
AI
641 (__v2df)
642 _mm_setzero_pd (),
643 (__mmask8) __U);
644}
645
646extern __inline void
647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
648_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
649{
fc9cf6da 650 __builtin_ia32_storeupd256_mask ((double *) __P,
936c0fe4
AI
651 (__v4df) __A,
652 (__mmask8) __U);
653}
654
655extern __inline void
656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
657_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
658{
fc9cf6da 659 __builtin_ia32_storeupd128_mask ((double *) __P,
936c0fe4
AI
660 (__v2df) __A,
661 (__mmask8) __U);
662}
663
664extern __inline __m256
665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
666_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
667{
fc9cf6da 668 return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
936c0fe4
AI
669 (__v8sf) __W,
670 (__mmask8) __U);
671}
672
673extern __inline __m256
674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
675_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
676{
fc9cf6da 677 return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
936c0fe4
AI
678 (__v8sf)
679 _mm256_setzero_ps (),
680 (__mmask8) __U);
681}
682
683extern __inline __m128
684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
685_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
686{
fc9cf6da 687 return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
936c0fe4
AI
688 (__v4sf) __W,
689 (__mmask8) __U);
690}
691
692extern __inline __m128
693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
694_mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
695{
fc9cf6da 696 return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
936c0fe4
AI
697 (__v4sf)
698 _mm_setzero_ps (),
699 (__mmask8) __U);
700}
701
702extern __inline void
703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
704_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
705{
fc9cf6da 706 __builtin_ia32_storeups256_mask ((float *) __P,
936c0fe4
AI
707 (__v8sf) __A,
708 (__mmask8) __U);
709}
710
711extern __inline void
712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
713_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
714{
fc9cf6da 715 __builtin_ia32_storeups128_mask ((float *) __P,
936c0fe4
AI
716 (__v4sf) __A,
717 (__mmask8) __U);
718}
719
720extern __inline __m256i
721__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
723{
fc9cf6da 724 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
936c0fe4
AI
725 (__v4di) __W,
726 (__mmask8) __U);
727}
728
729extern __inline __m256i
730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
731_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
732{
fc9cf6da 733 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
936c0fe4
AI
734 (__v4di)
735 _mm256_setzero_si256 (),
736 (__mmask8) __U);
737}
738
739extern __inline __m128i
740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
741_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
742{
fc9cf6da 743 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
936c0fe4
AI
744 (__v2di) __W,
745 (__mmask8) __U);
746}
747
748extern __inline __m128i
749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
750_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
751{
fc9cf6da 752 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
936c0fe4 753 (__v2di)
a25a7887 754 _mm_setzero_si128 (),
936c0fe4
AI
755 (__mmask8) __U);
756}
757
758extern __inline void
759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
760_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
761{
fc9cf6da 762 __builtin_ia32_storedqudi256_mask ((long long *) __P,
936c0fe4
AI
763 (__v4di) __A,
764 (__mmask8) __U);
765}
766
767extern __inline void
768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
769_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
770{
fc9cf6da 771 __builtin_ia32_storedqudi128_mask ((long long *) __P,
936c0fe4
AI
772 (__v2di) __A,
773 (__mmask8) __U);
774}
775
776extern __inline __m256i
777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
778_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
779{
fc9cf6da 780 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
936c0fe4
AI
781 (__v8si) __W,
782 (__mmask8) __U);
783}
784
785extern __inline __m256i
786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
787_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
788{
fc9cf6da 789 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
936c0fe4
AI
790 (__v8si)
791 _mm256_setzero_si256 (),
792 (__mmask8) __U);
793}
794
795extern __inline __m128i
796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
798{
fc9cf6da 799 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
936c0fe4
AI
800 (__v4si) __W,
801 (__mmask8) __U);
802}
803
804extern __inline __m128i
805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
806_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
807{
fc9cf6da 808 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
936c0fe4
AI
809 (__v4si)
810 _mm_setzero_si128 (),
811 (__mmask8) __U);
812}
813
814extern __inline void
815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
816_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
817{
fc9cf6da 818 __builtin_ia32_storedqusi256_mask ((int *) __P,
936c0fe4
AI
819 (__v8si) __A,
820 (__mmask8) __U);
821}
822
823extern __inline void
824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
825_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
826{
fc9cf6da 827 __builtin_ia32_storedqusi128_mask ((int *) __P,
936c0fe4
AI
828 (__v4si) __A,
829 (__mmask8) __U);
830}
831
832extern __inline __m256i
833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
834_mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
835{
836 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
837 (__v8si) __W,
838 (__mmask8) __U);
839}
840
841extern __inline __m256i
842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
843_mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A)
844{
845 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
846 (__v8si)
847 _mm256_setzero_si256 (),
848 (__mmask8) __U);
849}
850
851extern __inline __m128i
852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
853_mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
854{
855 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
856 (__v4si) __W,
857 (__mmask8) __U);
858}
859
860extern __inline __m128i
861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
862_mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A)
863{
864 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
865 (__v4si)
866 _mm_setzero_si128 (),
867 (__mmask8) __U);
868}
869
870extern __inline __m256i
871__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
872_mm256_abs_epi64 (__m256i __A)
873{
874 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
875 (__v4di)
876 _mm256_setzero_si256 (),
877 (__mmask8) -1);
878}
879
880extern __inline __m256i
881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
883{
884 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
885 (__v4di) __W,
886 (__mmask8) __U);
887}
888
889extern __inline __m256i
890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
891_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A)
892{
893 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
894 (__v4di)
895 _mm256_setzero_si256 (),
896 (__mmask8) __U);
897}
898
899extern __inline __m128i
900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
901_mm_abs_epi64 (__m128i __A)
902{
903 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
904 (__v2di)
a25a7887 905 _mm_setzero_si128 (),
936c0fe4
AI
906 (__mmask8) -1);
907}
908
909extern __inline __m128i
910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
911_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
912{
913 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
914 (__v2di) __W,
915 (__mmask8) __U);
916}
917
918extern __inline __m128i
919__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
920_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
921{
922 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
923 (__v2di)
a25a7887 924 _mm_setzero_si128 (),
936c0fe4
AI
925 (__mmask8) __U);
926}
927
928extern __inline __m128i
929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
930_mm256_cvtpd_epu32 (__m256d __A)
931{
932 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
933 (__v4si)
934 _mm_setzero_si128 (),
935 (__mmask8) -1);
936}
937
938extern __inline __m128i
939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
940_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
941{
942 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
943 (__v4si) __W,
944 (__mmask8) __U);
945}
946
947extern __inline __m128i
948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
949_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A)
950{
951 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
952 (__v4si)
953 _mm_setzero_si128 (),
954 (__mmask8) __U);
955}
956
957extern __inline __m128i
958__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
959_mm_cvtpd_epu32 (__m128d __A)
960{
961 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
962 (__v4si)
963 _mm_setzero_si128 (),
964 (__mmask8) -1);
965}
966
967extern __inline __m128i
968__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
969_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
970{
971 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
972 (__v4si) __W,
973 (__mmask8) __U);
974}
975
976extern __inline __m128i
977__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
978_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A)
979{
980 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
981 (__v4si)
982 _mm_setzero_si128 (),
983 (__mmask8) __U);
984}
985
986extern __inline __m256i
987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
988_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
989{
990 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
991 (__v8si) __W,
992 (__mmask8) __U);
993}
994
995extern __inline __m256i
996__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
997_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A)
998{
999 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1000 (__v8si)
1001 _mm256_setzero_si256 (),
1002 (__mmask8) __U);
1003}
1004
1005extern __inline __m128i
1006__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1007_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
1008{
1009 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1010 (__v4si) __W,
1011 (__mmask8) __U);
1012}
1013
1014extern __inline __m128i
1015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1016_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A)
1017{
1018 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1019 (__v4si)
1020 _mm_setzero_si128 (),
1021 (__mmask8) __U);
1022}
1023
1024extern __inline __m256i
1025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1026_mm256_cvttps_epu32 (__m256 __A)
1027{
1028 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1029 (__v8si)
1030 _mm256_setzero_si256 (),
1031 (__mmask8) -1);
1032}
1033
1034extern __inline __m256i
1035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1036_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
1037{
1038 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1039 (__v8si) __W,
1040 (__mmask8) __U);
1041}
1042
1043extern __inline __m256i
1044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1045_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A)
1046{
1047 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1048 (__v8si)
1049 _mm256_setzero_si256 (),
1050 (__mmask8) __U);
1051}
1052
1053extern __inline __m128i
1054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1055_mm_cvttps_epu32 (__m128 __A)
1056{
1057 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1058 (__v4si)
1059 _mm_setzero_si128 (),
1060 (__mmask8) -1);
1061}
1062
1063extern __inline __m128i
1064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1065_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
1066{
1067 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1068 (__v4si) __W,
1069 (__mmask8) __U);
1070}
1071
1072extern __inline __m128i
1073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1074_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A)
1075{
1076 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1077 (__v4si)
1078 _mm_setzero_si128 (),
1079 (__mmask8) __U);
1080}
1081
1082extern __inline __m128i
1083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1084_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1085{
1086 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1087 (__v4si) __W,
1088 (__mmask8) __U);
1089}
1090
1091extern __inline __m128i
1092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1093_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A)
1094{
1095 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1096 (__v4si)
1097 _mm_setzero_si128 (),
1098 (__mmask8) __U);
1099}
1100
1101extern __inline __m128i
1102__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1103_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1104{
1105 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1106 (__v4si) __W,
1107 (__mmask8) __U);
1108}
1109
1110extern __inline __m128i
1111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1112_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A)
1113{
1114 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1115 (__v4si)
1116 _mm_setzero_si128 (),
1117 (__mmask8) __U);
1118}
1119
1120extern __inline __m128i
1121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1122_mm256_cvttpd_epu32 (__m256d __A)
1123{
1124 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1125 (__v4si)
1126 _mm_setzero_si128 (),
1127 (__mmask8) -1);
1128}
1129
1130extern __inline __m128i
1131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1132_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
1133{
1134 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1135 (__v4si) __W,
1136 (__mmask8) __U);
1137}
1138
1139extern __inline __m128i
1140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1141_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A)
1142{
1143 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1144 (__v4si)
1145 _mm_setzero_si128 (),
1146 (__mmask8) __U);
1147}
1148
1149extern __inline __m128i
1150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1151_mm_cvttpd_epu32 (__m128d __A)
1152{
1153 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1154 (__v4si)
1155 _mm_setzero_si128 (),
1156 (__mmask8) -1);
1157}
1158
1159extern __inline __m128i
1160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1161_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
1162{
1163 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1164 (__v4si) __W,
1165 (__mmask8) __U);
1166}
1167
1168extern __inline __m128i
1169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1170_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A)
1171{
1172 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1173 (__v4si)
1174 _mm_setzero_si128 (),
1175 (__mmask8) __U);
1176}
1177
1178extern __inline __m128i
1179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1180_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1181{
1182 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1183 (__v4si) __W,
1184 (__mmask8) __U);
1185}
1186
1187extern __inline __m128i
1188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1189_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A)
1190{
1191 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1192 (__v4si)
1193 _mm_setzero_si128 (),
1194 (__mmask8) __U);
1195}
1196
1197extern __inline __m128i
1198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1199_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1200{
1201 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1202 (__v4si) __W,
1203 (__mmask8) __U);
1204}
1205
1206extern __inline __m128i
1207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1208_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A)
1209{
1210 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1211 (__v4si)
1212 _mm_setzero_si128 (),
1213 (__mmask8) __U);
1214}
1215
1216extern __inline __m256d
1217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1218_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1219{
1220 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1221 (__v4df) __W,
1222 (__mmask8) __U);
1223}
1224
1225extern __inline __m256d
1226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1227_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1228{
1229 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1230 (__v4df)
1231 _mm256_setzero_pd (),
1232 (__mmask8) __U);
1233}
1234
1235extern __inline __m128d
1236__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1237_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1238{
1239 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1240 (__v2df) __W,
1241 (__mmask8) __U);
1242}
1243
1244extern __inline __m128d
1245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1246_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1247{
1248 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1249 (__v2df)
1250 _mm_setzero_pd (),
1251 (__mmask8) __U);
1252}
1253
1254extern __inline __m256d
1255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1256_mm256_cvtepu32_pd (__m128i __A)
1257{
1258 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1259 (__v4df)
1260 _mm256_setzero_pd (),
1261 (__mmask8) -1);
1262}
1263
1264extern __inline __m256d
1265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1266_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1267{
1268 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1269 (__v4df) __W,
1270 (__mmask8) __U);
1271}
1272
1273extern __inline __m256d
1274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1275_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1276{
1277 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1278 (__v4df)
1279 _mm256_setzero_pd (),
1280 (__mmask8) __U);
1281}
1282
1283extern __inline __m128d
1284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1285_mm_cvtepu32_pd (__m128i __A)
1286{
1287 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1288 (__v2df)
1289 _mm_setzero_pd (),
1290 (__mmask8) -1);
1291}
1292
1293extern __inline __m128d
1294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1295_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1296{
1297 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1298 (__v2df) __W,
1299 (__mmask8) __U);
1300}
1301
1302extern __inline __m128d
1303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1304_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1305{
1306 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1307 (__v2df)
1308 _mm_setzero_pd (),
1309 (__mmask8) __U);
1310}
1311
1312extern __inline __m256
1313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1314_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1315{
1316 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1317 (__v8sf) __W,
1318 (__mmask8) __U);
1319}
1320
1321extern __inline __m256
1322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 1323_mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A)
936c0fe4
AI
1324{
1325 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1326 (__v8sf)
1327 _mm256_setzero_ps (),
1328 (__mmask8) __U);
1329}
1330
1331extern __inline __m128
1332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1333_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1334{
1335 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1336 (__v4sf) __W,
1337 (__mmask8) __U);
1338}
1339
1340extern __inline __m128
1341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 1342_mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A)
936c0fe4
AI
1343{
1344 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1345 (__v4sf)
1346 _mm_setzero_ps (),
1347 (__mmask8) __U);
1348}
1349
1350extern __inline __m256
1351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1352_mm256_cvtepu32_ps (__m256i __A)
1353{
1354 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1355 (__v8sf)
1356 _mm256_setzero_ps (),
1357 (__mmask8) -1);
1358}
1359
1360extern __inline __m256
1361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1362_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1363{
1364 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1365 (__v8sf) __W,
1366 (__mmask8) __U);
1367}
1368
1369extern __inline __m256
1370__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1371_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A)
1372{
1373 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1374 (__v8sf)
1375 _mm256_setzero_ps (),
1376 (__mmask8) __U);
1377}
1378
1379extern __inline __m128
1380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1381_mm_cvtepu32_ps (__m128i __A)
1382{
1383 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1384 (__v4sf)
1385 _mm_setzero_ps (),
1386 (__mmask8) -1);
1387}
1388
1389extern __inline __m128
1390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1391_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1392{
1393 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1394 (__v4sf) __W,
1395 (__mmask8) __U);
1396}
1397
1398extern __inline __m128
1399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1400_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A)
1401{
1402 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1403 (__v4sf)
1404 _mm_setzero_ps (),
1405 (__mmask8) __U);
1406}
1407
1408extern __inline __m256d
1409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1410_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A)
1411{
1412 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1413 (__v4df) __W,
1414 (__mmask8) __U);
1415}
1416
1417extern __inline __m256d
1418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1419_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1420{
1421 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1422 (__v4df)
1423 _mm256_setzero_pd (),
1424 (__mmask8) __U);
1425}
1426
1427extern __inline __m128d
1428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1429_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A)
1430{
1431 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1432 (__v2df) __W,
1433 (__mmask8) __U);
1434}
1435
1436extern __inline __m128d
1437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1438_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1439{
1440 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1441 (__v2df)
1442 _mm_setzero_pd (),
1443 (__mmask8) __U);
1444}
1445
1446extern __inline __m128i
1447__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1448_mm_cvtepi32_epi8 (__m128i __A)
1449{
1450 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
a25a7887
JJ
1451 (__v16qi)
1452 _mm_undefined_si128 (),
936c0fe4
AI
1453 (__mmask8) -1);
1454}
1455
1456extern __inline void
1457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1458_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1459{
1460 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1461}
1462
1463extern __inline __m128i
1464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1465_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1466{
1467 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1468 (__v16qi) __O, __M);
1469}
1470
1471extern __inline __m128i
1472__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1473_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
1474{
1475 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1476 (__v16qi)
1477 _mm_setzero_si128 (),
1478 __M);
1479}
1480
1481extern __inline __m128i
1482__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1483_mm256_cvtepi32_epi8 (__m256i __A)
1484{
1485 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
a25a7887
JJ
1486 (__v16qi)
1487 _mm_undefined_si128 (),
936c0fe4
AI
1488 (__mmask8) -1);
1489}
1490
1491extern __inline __m128i
1492__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1493_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1494{
1495 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1496 (__v16qi) __O, __M);
1497}
1498
1499extern __inline void
1500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1501_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1502{
1503 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1504}
1505
1506extern __inline __m128i
1507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1508_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
1509{
1510 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1511 (__v16qi)
1512 _mm_setzero_si128 (),
1513 __M);
1514}
1515
1516extern __inline __m128i
1517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1518_mm_cvtsepi32_epi8 (__m128i __A)
1519{
1520 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
a25a7887
JJ
1521 (__v16qi)
1522 _mm_undefined_si128 (),
936c0fe4
AI
1523 (__mmask8) -1);
1524}
1525
1526extern __inline void
1527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1528_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1529{
1530 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1531}
1532
1533extern __inline __m128i
1534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1535_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1536{
1537 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1538 (__v16qi) __O, __M);
1539}
1540
1541extern __inline __m128i
1542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1543_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
1544{
1545 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1546 (__v16qi)
1547 _mm_setzero_si128 (),
1548 __M);
1549}
1550
1551extern __inline __m128i
1552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1553_mm256_cvtsepi32_epi8 (__m256i __A)
1554{
1555 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
a25a7887
JJ
1556 (__v16qi)
1557 _mm_undefined_si128 (),
936c0fe4
AI
1558 (__mmask8) -1);
1559}
1560
1561extern __inline void
1562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1563_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1564{
1565 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1566}
1567
1568extern __inline __m128i
1569__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1570_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1571{
1572 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1573 (__v16qi) __O, __M);
1574}
1575
1576extern __inline __m128i
1577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1578_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
1579{
1580 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1581 (__v16qi)
1582 _mm_setzero_si128 (),
1583 __M);
1584}
1585
1586extern __inline __m128i
1587__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1588_mm_cvtusepi32_epi8 (__m128i __A)
1589{
1590 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
a25a7887
JJ
1591 (__v16qi)
1592 _mm_undefined_si128 (),
936c0fe4
AI
1593 (__mmask8) -1);
1594}
1595
1596extern __inline void
1597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1598_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1599{
1600 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1601}
1602
1603extern __inline __m128i
1604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1605_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1606{
1607 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1608 (__v16qi) __O,
1609 __M);
1610}
1611
1612extern __inline __m128i
1613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1614_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
1615{
1616 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1617 (__v16qi)
1618 _mm_setzero_si128 (),
1619 __M);
1620}
1621
1622extern __inline __m128i
1623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1624_mm256_cvtusepi32_epi8 (__m256i __A)
1625{
1626 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
a25a7887
JJ
1627 (__v16qi)
1628 _mm_undefined_si128 (),
936c0fe4
AI
1629 (__mmask8) -1);
1630}
1631
1632extern __inline void
1633__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1634_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1635{
1636 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
1637}
1638
1639extern __inline __m128i
1640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1641_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1642{
1643 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1644 (__v16qi) __O,
1645 __M);
1646}
1647
1648extern __inline __m128i
1649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1650_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
1651{
1652 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1653 (__v16qi)
1654 _mm_setzero_si128 (),
1655 __M);
1656}
1657
1658extern __inline __m128i
1659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1660_mm_cvtepi32_epi16 (__m128i __A)
1661{
1662 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
a25a7887
JJ
1663 (__v8hi)
1664 _mm_setzero_si128 (),
936c0fe4
AI
1665 (__mmask8) -1);
1666}
1667
1668extern __inline void
1669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1670_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1671{
1672 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1673}
1674
1675extern __inline __m128i
1676__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1677_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1678{
1679 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1680 (__v8hi) __O, __M);
1681}
1682
1683extern __inline __m128i
1684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1685_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
1686{
1687 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1688 (__v8hi)
1689 _mm_setzero_si128 (),
1690 __M);
1691}
1692
1693extern __inline __m128i
1694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1695_mm256_cvtepi32_epi16 (__m256i __A)
1696{
1697 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
a25a7887
JJ
1698 (__v8hi)
1699 _mm_setzero_si128 (),
936c0fe4
AI
1700 (__mmask8) -1);
1701}
1702
9ab4c07a 1703extern __inline void
936c0fe4
AI
1704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705_mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1706{
1707 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1708}
1709
1710extern __inline __m128i
1711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1712_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1713{
1714 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1715 (__v8hi) __O, __M);
1716}
1717
1718extern __inline __m128i
1719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1720_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
1721{
1722 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1723 (__v8hi)
1724 _mm_setzero_si128 (),
1725 __M);
1726}
1727
1728extern __inline __m128i
1729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1730_mm_cvtsepi32_epi16 (__m128i __A)
1731{
1732 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
a25a7887
JJ
1733 (__v8hi)
1734 _mm_setzero_si128 (),
936c0fe4
AI
1735 (__mmask8) -1);
1736}
1737
1738extern __inline void
1739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1740_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1741{
1742 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1743}
1744
1745extern __inline __m128i
1746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1747_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1748{
1749 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1750 (__v8hi)__O,
1751 __M);
1752}
1753
1754extern __inline __m128i
1755__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1756_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
1757{
1758 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1759 (__v8hi)
1760 _mm_setzero_si128 (),
1761 __M);
1762}
1763
1764extern __inline __m128i
1765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1766_mm256_cvtsepi32_epi16 (__m256i __A)
1767{
1768 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
a25a7887
JJ
1769 (__v8hi)
1770 _mm_undefined_si128 (),
936c0fe4
AI
1771 (__mmask8) -1);
1772}
1773
1774extern __inline void
1775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1776_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1777{
1778 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1779}
1780
1781extern __inline __m128i
1782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1783_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1784{
1785 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1786 (__v8hi) __O, __M);
1787}
1788
1789extern __inline __m128i
1790__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1791_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
1792{
1793 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1794 (__v8hi)
1795 _mm_setzero_si128 (),
1796 __M);
1797}
1798
1799extern __inline __m128i
1800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1801_mm_cvtusepi32_epi16 (__m128i __A)
1802{
1803 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
a25a7887
JJ
1804 (__v8hi)
1805 _mm_undefined_si128 (),
936c0fe4
AI
1806 (__mmask8) -1);
1807}
1808
9ab4c07a 1809extern __inline void
936c0fe4
AI
1810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1811_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1812{
1813 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1814}
1815
1816extern __inline __m128i
1817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1818_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1819{
1820 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1821 (__v8hi) __O, __M);
1822}
1823
1824extern __inline __m128i
1825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1826_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
1827{
1828 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1829 (__v8hi)
1830 _mm_setzero_si128 (),
1831 __M);
1832}
1833
1834extern __inline __m128i
1835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1836_mm256_cvtusepi32_epi16 (__m256i __A)
1837{
1838 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
a25a7887
JJ
1839 (__v8hi)
1840 _mm_undefined_si128 (),
936c0fe4
AI
1841 (__mmask8) -1);
1842}
1843
1844extern __inline void
1845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1846_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1847{
1848 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1849}
1850
1851extern __inline __m128i
1852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1853_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1854{
1855 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1856 (__v8hi) __O, __M);
1857}
1858
1859extern __inline __m128i
1860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1861_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
1862{
1863 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1864 (__v8hi)
1865 _mm_setzero_si128 (),
1866 __M);
1867}
1868
1869extern __inline __m128i
1870__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1871_mm_cvtepi64_epi8 (__m128i __A)
1872{
1873 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
a25a7887
JJ
1874 (__v16qi)
1875 _mm_undefined_si128 (),
936c0fe4
AI
1876 (__mmask8) -1);
1877}
1878
1879extern __inline void
1880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1881_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1882{
1883 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1884}
1885
1886extern __inline __m128i
1887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1888_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1889{
1890 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1891 (__v16qi) __O, __M);
1892}
1893
1894extern __inline __m128i
1895__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1896_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
1897{
1898 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1899 (__v16qi)
1900 _mm_setzero_si128 (),
1901 __M);
1902}
1903
1904extern __inline __m128i
1905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1906_mm256_cvtepi64_epi8 (__m256i __A)
1907{
1908 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
a25a7887
JJ
1909 (__v16qi)
1910 _mm_undefined_si128 (),
936c0fe4
AI
1911 (__mmask8) -1);
1912}
1913
1914extern __inline void
1915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1916_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1917{
1918 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1919}
1920
1921extern __inline __m128i
1922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1923_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1924{
1925 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1926 (__v16qi) __O, __M);
1927}
1928
1929extern __inline __m128i
1930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1931_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
1932{
1933 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1934 (__v16qi)
1935 _mm_setzero_si128 (),
1936 __M);
1937}
1938
1939extern __inline __m128i
1940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1941_mm_cvtsepi64_epi8 (__m128i __A)
1942{
1943 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
a25a7887
JJ
1944 (__v16qi)
1945 _mm_undefined_si128 (),
936c0fe4
AI
1946 (__mmask8) -1);
1947}
1948
1949extern __inline void
1950__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1951_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1952{
1953 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1954}
1955
1956extern __inline __m128i
1957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1958_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1959{
1960 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1961 (__v16qi) __O, __M);
1962}
1963
1964extern __inline __m128i
1965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1966_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
1967{
1968 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1969 (__v16qi)
1970 _mm_setzero_si128 (),
1971 __M);
1972}
1973
1974extern __inline __m128i
1975__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1976_mm256_cvtsepi64_epi8 (__m256i __A)
1977{
1978 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
a25a7887
JJ
1979 (__v16qi)
1980 _mm_undefined_si128 (),
936c0fe4
AI
1981 (__mmask8) -1);
1982}
1983
1984extern __inline void
1985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1986_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1987{
1988 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1989}
1990
1991extern __inline __m128i
1992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1993_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1994{
1995 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
1996 (__v16qi) __O, __M);
1997}
1998
1999extern __inline __m128i
2000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2001_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
2002{
2003 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
2004 (__v16qi)
2005 _mm_setzero_si128 (),
2006 __M);
2007}
2008
2009extern __inline __m128i
2010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2011_mm_cvtusepi64_epi8 (__m128i __A)
2012{
2013 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
a25a7887
JJ
2014 (__v16qi)
2015 _mm_undefined_si128 (),
936c0fe4
AI
2016 (__mmask8) -1);
2017}
2018
2019extern __inline void
2020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2021_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
2022{
2023 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
2024}
2025
2026extern __inline __m128i
2027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
2029{
2030 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2031 (__v16qi) __O,
2032 __M);
2033}
2034
2035extern __inline __m128i
2036__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2037_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
2038{
2039 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2040 (__v16qi)
2041 _mm_setzero_si128 (),
2042 __M);
2043}
2044
2045extern __inline __m128i
2046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2047_mm256_cvtusepi64_epi8 (__m256i __A)
2048{
2049 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
a25a7887
JJ
2050 (__v16qi)
2051 _mm_undefined_si128 (),
936c0fe4
AI
2052 (__mmask8) -1);
2053}
2054
2055extern __inline void
2056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2057_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
2058{
2059 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
2060}
2061
2062extern __inline __m128i
2063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2064_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
2065{
2066 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2067 (__v16qi) __O,
2068 __M);
2069}
2070
2071extern __inline __m128i
2072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2073_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
2074{
2075 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2076 (__v16qi)
2077 _mm_setzero_si128 (),
2078 __M);
2079}
2080
2081extern __inline __m128i
2082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2083_mm_cvtepi64_epi16 (__m128i __A)
2084{
2085 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
a25a7887
JJ
2086 (__v8hi)
2087 _mm_undefined_si128 (),
936c0fe4
AI
2088 (__mmask8) -1);
2089}
2090
2091extern __inline void
2092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2093_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2094{
2095 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2096}
2097
2098extern __inline __m128i
2099__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2100_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2101{
2102 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2103 (__v8hi)__O,
2104 __M);
2105}
2106
2107extern __inline __m128i
2108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2109_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
2110{
2111 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2112 (__v8hi)
2113 _mm_setzero_si128 (),
2114 __M);
2115}
2116
2117extern __inline __m128i
2118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2119_mm256_cvtepi64_epi16 (__m256i __A)
2120{
2121 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
a25a7887
JJ
2122 (__v8hi)
2123 _mm_undefined_si128 (),
936c0fe4
AI
2124 (__mmask8) -1);
2125}
2126
2127extern __inline void
2128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2129_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2130{
2131 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2132}
2133
2134extern __inline __m128i
2135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2136_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2137{
2138 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2139 (__v8hi) __O, __M);
2140}
2141
2142extern __inline __m128i
2143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2144_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
2145{
2146 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2147 (__v8hi)
2148 _mm_setzero_si128 (),
2149 __M);
2150}
2151
2152extern __inline __m128i
2153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2154_mm_cvtsepi64_epi16 (__m128i __A)
2155{
2156 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
a25a7887
JJ
2157 (__v8hi)
2158 _mm_undefined_si128 (),
936c0fe4
AI
2159 (__mmask8) -1);
2160}
2161
2162extern __inline void
2163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2164_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2165{
2166 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2167}
2168
2169extern __inline __m128i
2170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2171_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2172{
2173 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2174 (__v8hi) __O, __M);
2175}
2176
2177extern __inline __m128i
2178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2179_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
2180{
2181 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2182 (__v8hi)
2183 _mm_setzero_si128 (),
2184 __M);
2185}
2186
2187extern __inline __m128i
2188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2189_mm256_cvtsepi64_epi16 (__m256i __A)
2190{
2191 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
a25a7887
JJ
2192 (__v8hi)
2193 _mm_undefined_si128 (),
936c0fe4
AI
2194 (__mmask8) -1);
2195}
2196
2197extern __inline void
2198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2199_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2200{
2201 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2202}
2203
2204extern __inline __m128i
2205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2206_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2207{
2208 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2209 (__v8hi) __O, __M);
2210}
2211
2212extern __inline __m128i
2213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2214_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
2215{
2216 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2217 (__v8hi)
2218 _mm_setzero_si128 (),
2219 __M);
2220}
2221
2222extern __inline __m128i
2223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2224_mm_cvtusepi64_epi16 (__m128i __A)
2225{
2226 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
a25a7887
JJ
2227 (__v8hi)
2228 _mm_undefined_si128 (),
936c0fe4
AI
2229 (__mmask8) -1);
2230}
2231
2232extern __inline void
2233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2234_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2235{
2236 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2237}
2238
2239extern __inline __m128i
2240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2241_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2242{
2243 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2244 (__v8hi) __O, __M);
2245}
2246
2247extern __inline __m128i
2248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2249_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
2250{
2251 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2252 (__v8hi)
2253 _mm_setzero_si128 (),
2254 __M);
2255}
2256
2257extern __inline __m128i
2258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2259_mm256_cvtusepi64_epi16 (__m256i __A)
2260{
2261 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
a25a7887
JJ
2262 (__v8hi)
2263 _mm_undefined_si128 (),
936c0fe4
AI
2264 (__mmask8) -1);
2265}
2266
2267extern __inline void
2268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2269_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2270{
2271 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2272}
2273
2274extern __inline __m128i
2275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2276_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2277{
2278 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2279 (__v8hi) __O, __M);
2280}
2281
2282extern __inline __m128i
2283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2284_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
2285{
2286 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2287 (__v8hi)
2288 _mm_setzero_si128 (),
2289 __M);
2290}
2291
2292extern __inline __m128i
2293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2294_mm_cvtepi64_epi32 (__m128i __A)
2295{
2296 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
a25a7887
JJ
2297 (__v4si)
2298 _mm_undefined_si128 (),
936c0fe4
AI
2299 (__mmask8) -1);
2300}
2301
2302extern __inline void
2303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2304_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2305{
2306 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2307}
2308
2309extern __inline __m128i
2310__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2311_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2312{
2313 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2314 (__v4si) __O, __M);
2315}
2316
2317extern __inline __m128i
2318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2319_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
2320{
2321 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2322 (__v4si)
2323 _mm_setzero_si128 (),
2324 __M);
2325}
2326
2327extern __inline __m128i
2328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2329_mm256_cvtepi64_epi32 (__m256i __A)
2330{
2331 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
a25a7887
JJ
2332 (__v4si)
2333 _mm_undefined_si128 (),
936c0fe4
AI
2334 (__mmask8) -1);
2335}
2336
2337extern __inline void
2338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2339_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2340{
2341 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2342}
2343
2344extern __inline __m128i
2345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2346_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2347{
2348 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2349 (__v4si) __O, __M);
2350}
2351
2352extern __inline __m128i
2353__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2354_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
2355{
2356 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2357 (__v4si)
2358 _mm_setzero_si128 (),
2359 __M);
2360}
2361
2362extern __inline __m128i
2363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2364_mm_cvtsepi64_epi32 (__m128i __A)
2365{
2366 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
a25a7887
JJ
2367 (__v4si)
2368 _mm_undefined_si128 (),
936c0fe4
AI
2369 (__mmask8) -1);
2370}
2371
9ab4c07a 2372extern __inline void
936c0fe4
AI
2373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2374_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2375{
2376 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2377}
2378
2379extern __inline __m128i
2380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2381_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2382{
2383 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2384 (__v4si) __O, __M);
2385}
2386
2387extern __inline __m128i
2388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2389_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
2390{
2391 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2392 (__v4si)
2393 _mm_setzero_si128 (),
2394 __M);
2395}
2396
2397extern __inline __m128i
2398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2399_mm256_cvtsepi64_epi32 (__m256i __A)
2400{
2401 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
a25a7887
JJ
2402 (__v4si)
2403 _mm_undefined_si128 (),
936c0fe4
AI
2404 (__mmask8) -1);
2405}
2406
2407extern __inline void
2408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2409_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2410{
2411 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2412}
2413
2414extern __inline __m128i
2415__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2416_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2417{
2418 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2419 (__v4si)__O,
2420 __M);
2421}
2422
2423extern __inline __m128i
2424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2425_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
2426{
2427 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2428 (__v4si)
2429 _mm_setzero_si128 (),
2430 __M);
2431}
2432
2433extern __inline __m128i
2434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2435_mm_cvtusepi64_epi32 (__m128i __A)
2436{
2437 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
a25a7887
JJ
2438 (__v4si)
2439 _mm_undefined_si128 (),
936c0fe4
AI
2440 (__mmask8) -1);
2441}
2442
2443extern __inline void
2444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2445_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2446{
2447 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2448}
2449
2450extern __inline __m128i
2451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2452_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2453{
2454 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2455 (__v4si) __O, __M);
2456}
2457
2458extern __inline __m128i
2459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2460_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
2461{
2462 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2463 (__v4si)
2464 _mm_setzero_si128 (),
2465 __M);
2466}
2467
2468extern __inline __m128i
2469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2470_mm256_cvtusepi64_epi32 (__m256i __A)
2471{
2472 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
a25a7887
JJ
2473 (__v4si)
2474 _mm_undefined_si128 (),
936c0fe4
AI
2475 (__mmask8) -1);
2476}
2477
2478extern __inline void
2479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2480_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2481{
2482 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2483}
2484
2485extern __inline __m128i
2486__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2487_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2488{
2489 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2490 (__v4si) __O, __M);
2491}
2492
2493extern __inline __m128i
2494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2495_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
2496{
2497 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2498 (__v4si)
2499 _mm_setzero_si128 (),
2500 __M);
2501}
2502
2503extern __inline __m256
2504__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2505_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
2506{
2507 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2508 (__v8sf) __O,
2509 __M);
2510}
2511
2512extern __inline __m256
2513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2514_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2515{
2516 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2517 (__v8sf)
2518 _mm256_setzero_ps (),
2519 __M);
2520}
2521
2522extern __inline __m128
2523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2524_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
2525{
2526 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2527 (__v4sf) __O,
2528 __M);
2529}
2530
2531extern __inline __m128
2532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2533_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2534{
2535 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2536 (__v4sf)
2537 _mm_setzero_ps (),
2538 __M);
2539}
2540
2541extern __inline __m256d
2542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2543_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
2544{
2545 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2546 (__v4df) __O,
2547 __M);
2548}
2549
2550extern __inline __m256d
2551__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2552_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
2553{
2554 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2555 (__v4df)
2556 _mm256_setzero_pd (),
2557 __M);
2558}
2559
2560extern __inline __m256i
2561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2562_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
2563{
2564 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2565 (__v8si) __O,
2566 __M);
2567}
2568
2569extern __inline __m256i
2570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2571_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2572{
2573 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2574 (__v8si)
2575 _mm256_setzero_si256 (),
2576 __M);
2577}
2578
2579extern __inline __m256i
2580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2581_mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A)
2582{
2583 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O,
2584 __M);
2585}
2586
2587extern __inline __m256i
2588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2589_mm256_maskz_set1_epi32 (__mmask8 __M, int __A)
2590{
2591 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A,
2592 (__v8si)
2593 _mm256_setzero_si256 (),
2594 __M);
2595}
2596
2597extern __inline __m128i
2598__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2599_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2600{
2601 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2602 (__v4si) __O,
2603 __M);
2604}
2605
2606extern __inline __m128i
2607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2608_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2609{
2610 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2611 (__v4si)
2612 _mm_setzero_si128 (),
2613 __M);
2614}
2615
2616extern __inline __m128i
2617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2618_mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A)
2619{
2620 return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O,
2621 __M);
2622}
2623
2624extern __inline __m128i
2625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2626_mm_maskz_set1_epi32 (__mmask8 __M, int __A)
2627{
a25a7887
JJ
2628 return (__m128i)
2629 __builtin_ia32_pbroadcastd128_gpr_mask (__A,
2630 (__v4si) _mm_setzero_si128 (),
2631 __M);
936c0fe4
AI
2632}
2633
2634extern __inline __m256i
2635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2636_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
2637{
2638 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2639 (__v4di) __O,
2640 __M);
2641}
2642
2643extern __inline __m256i
2644__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2645_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2646{
2647 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2648 (__v4di)
2649 _mm256_setzero_si256 (),
2650 __M);
2651}
2652
2653extern __inline __m256i
2654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2655_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
2656{
936c0fe4
AI
2657 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
2658 __M);
936c0fe4
AI
2659}
2660
2661extern __inline __m256i
2662__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2663_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
2664{
936c0fe4
AI
2665 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
2666 (__v4di)
2667 _mm256_setzero_si256 (),
2668 __M);
936c0fe4
AI
2669}
2670
2671extern __inline __m128i
2672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2673_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
2674{
2675 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2676 (__v2di) __O,
2677 __M);
2678}
2679
2680extern __inline __m128i
2681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2682_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2683{
2684 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2685 (__v2di)
2686 _mm_setzero_si128 (),
2687 __M);
2688}
2689
2690extern __inline __m128i
2691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2692_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
2693{
936c0fe4
AI
2694 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
2695 __M);
936c0fe4
AI
2696}
2697
2698extern __inline __m128i
2699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2700_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
2701{
a25a7887
JJ
2702 return (__m128i)
2703 __builtin_ia32_pbroadcastq128_gpr_mask (__A,
2704 (__v2di) _mm_setzero_si128 (),
2705 __M);
936c0fe4
AI
2706}
2707
2708extern __inline __m256
2709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2710_mm256_broadcast_f32x4 (__m128 __A)
2711{
2712 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2713 (__v8sf)_mm256_undefined_pd (),
c42b0bdf 2714 (__mmask8) -1);
936c0fe4
AI
2715}
2716
2717extern __inline __m256
2718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2719_mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
2720{
2721 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2722 (__v8sf) __O,
2723 __M);
2724}
2725
2726extern __inline __m256
2727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2728_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
2729{
2730 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2731 (__v8sf)
2732 _mm256_setzero_ps (),
2733 __M);
2734}
2735
2736extern __inline __m256i
2737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2738_mm256_broadcast_i32x4 (__m128i __A)
2739{
2740 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2741 __A,
2742 (__v8si)_mm256_undefined_si256 (),
c42b0bdf 2743 (__mmask8) -1);
936c0fe4
AI
2744}
2745
2746extern __inline __m256i
2747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2748_mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
2749{
2750 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2751 __A,
2752 (__v8si)
2753 __O, __M);
2754}
2755
2756extern __inline __m256i
2757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2758_mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
2759{
2760 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2761 __A,
2762 (__v8si)
2763 _mm256_setzero_si256 (),
2764 __M);
2765}
2766
2767extern __inline __m256i
2768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2769_mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2770{
2771 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2772 (__v8si) __W,
2773 (__mmask8) __U);
2774}
2775
2776extern __inline __m256i
2777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2778_mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2779{
2780 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2781 (__v8si)
2782 _mm256_setzero_si256 (),
2783 (__mmask8) __U);
2784}
2785
2786extern __inline __m128i
2787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2788_mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2789{
2790 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2791 (__v4si) __W,
2792 (__mmask8) __U);
2793}
2794
2795extern __inline __m128i
2796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2797_mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2798{
2799 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2800 (__v4si)
2801 _mm_setzero_si128 (),
2802 (__mmask8) __U);
2803}
2804
2805extern __inline __m256i
2806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2807_mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2808{
2809 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2810 (__v4di) __W,
2811 (__mmask8) __U);
2812}
2813
2814extern __inline __m256i
2815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2816_mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2817{
2818 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2819 (__v4di)
2820 _mm256_setzero_si256 (),
2821 (__mmask8) __U);
2822}
2823
2824extern __inline __m128i
2825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2826_mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2827{
2828 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2829 (__v2di) __W,
2830 (__mmask8) __U);
2831}
2832
2833extern __inline __m128i
2834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2835_mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2836{
2837 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2838 (__v2di)
2839 _mm_setzero_si128 (),
2840 (__mmask8) __U);
2841}
2842
2843extern __inline __m256i
2844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2845_mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2846{
2847 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2848 (__v8si) __W,
2849 (__mmask8) __U);
2850}
2851
2852extern __inline __m256i
2853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2854_mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2855{
2856 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2857 (__v8si)
2858 _mm256_setzero_si256 (),
2859 (__mmask8) __U);
2860}
2861
2862extern __inline __m128i
2863__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2864_mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2865{
2866 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2867 (__v4si) __W,
2868 (__mmask8) __U);
2869}
2870
2871extern __inline __m128i
2872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2873_mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2874{
2875 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2876 (__v4si)
2877 _mm_setzero_si128 (),
2878 (__mmask8) __U);
2879}
2880
2881extern __inline __m256i
2882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2883_mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2884{
2885 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2886 (__v4di) __W,
2887 (__mmask8) __U);
2888}
2889
2890extern __inline __m256i
2891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2892_mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2893{
2894 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2895 (__v4di)
2896 _mm256_setzero_si256 (),
2897 (__mmask8) __U);
2898}
2899
2900extern __inline __m128i
2901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2902_mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2903{
2904 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2905 (__v2di) __W,
2906 (__mmask8) __U);
2907}
2908
2909extern __inline __m128i
2910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2911_mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2912{
2913 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2914 (__v2di)
2915 _mm_setzero_si128 (),
2916 (__mmask8) __U);
2917}
2918
2919extern __inline __m256i
2920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2921_mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
2922{
2923 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2924 (__v4di) __W,
2925 (__mmask8) __U);
2926}
2927
2928extern __inline __m256i
2929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2930_mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2931{
2932 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2933 (__v4di)
2934 _mm256_setzero_si256 (),
2935 (__mmask8) __U);
2936}
2937
2938extern __inline __m128i
2939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2940_mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
2941{
2942 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2943 (__v2di) __W,
2944 (__mmask8) __U);
2945}
2946
2947extern __inline __m128i
2948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2949_mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2950{
2951 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2952 (__v2di)
2953 _mm_setzero_si128 (),
2954 (__mmask8) __U);
2955}
2956
2957extern __inline __m256i
2958__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2959_mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2960{
2961 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2962 (__v8si) __W,
2963 (__mmask8) __U);
2964}
2965
2966extern __inline __m256i
2967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2968_mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2969{
2970 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2971 (__v8si)
2972 _mm256_setzero_si256 (),
2973 (__mmask8) __U);
2974}
2975
2976extern __inline __m128i
2977__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2978_mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2979{
2980 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
2981 (__v4si) __W,
2982 (__mmask8) __U);
2983}
2984
2985extern __inline __m128i
2986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2987_mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2988{
2989 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
2990 (__v4si)
2991 _mm_setzero_si128 (),
2992 (__mmask8) __U);
2993}
2994
2995extern __inline __m256i
2996__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2997_mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2998{
2999 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
3000 (__v4di) __W,
3001 (__mmask8) __U);
3002}
3003
3004extern __inline __m256i
3005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3006_mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3007{
3008 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
3009 (__v4di)
3010 _mm256_setzero_si256 (),
3011 (__mmask8) __U);
3012}
3013
3014extern __inline __m128i
3015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3016_mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3017{
3018 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3019 (__v2di) __W,
3020 (__mmask8) __U);
3021}
3022
3023extern __inline __m128i
3024__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3025_mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3026{
3027 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3028 (__v2di)
3029 _mm_setzero_si128 (),
3030 (__mmask8) __U);
3031}
3032
3033extern __inline __m256i
3034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3035_mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3036{
3037 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3038 (__v8si) __W,
3039 (__mmask8) __U);
3040}
3041
3042extern __inline __m256i
3043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3044_mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3045{
3046 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3047 (__v8si)
3048 _mm256_setzero_si256 (),
3049 (__mmask8) __U);
3050}
3051
3052extern __inline __m128i
3053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3054_mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
3055{
3056 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3057 (__v4si) __W,
3058 (__mmask8) __U);
3059}
3060
3061extern __inline __m128i
3062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3063_mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3064{
3065 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3066 (__v4si)
3067 _mm_setzero_si128 (),
3068 (__mmask8) __U);
3069}
3070
3071extern __inline __m256i
3072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3073_mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
3074{
3075 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3076 (__v4di) __W,
3077 (__mmask8) __U);
3078}
3079
3080extern __inline __m256i
3081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3082_mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3083{
3084 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3085 (__v4di)
3086 _mm256_setzero_si256 (),
3087 (__mmask8) __U);
3088}
3089
3090extern __inline __m128i
3091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3092_mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3093{
3094 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3095 (__v2di) __W,
3096 (__mmask8) __U);
3097}
3098
3099extern __inline __m128i
3100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3101_mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3102{
3103 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3104 (__v2di)
3105 _mm_setzero_si128 (),
3106 (__mmask8) __U);
3107}
3108
3109extern __inline __m256i
3110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3111_mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
3112{
3113 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3114 (__v4di) __W,
3115 (__mmask8) __U);
3116}
3117
3118extern __inline __m256i
3119__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3120_mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3121{
3122 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3123 (__v4di)
3124 _mm256_setzero_si256 (),
3125 (__mmask8) __U);
3126}
3127
3128extern __inline __m128i
3129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3130_mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
3131{
3132 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3133 (__v2di) __W,
3134 (__mmask8) __U);
3135}
3136
3137extern __inline __m128i
3138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3139_mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3140{
3141 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3142 (__v2di)
3143 _mm_setzero_si128 (),
3144 (__mmask8) __U);
3145}
3146
3147extern __inline __m256d
3148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3149_mm256_rcp14_pd (__m256d __A)
3150{
3151 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3152 (__v4df)
3153 _mm256_setzero_pd (),
3154 (__mmask8) -1);
3155}
3156
3157extern __inline __m256d
3158__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3159_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3160{
3161 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3162 (__v4df) __W,
3163 (__mmask8) __U);
3164}
3165
3166extern __inline __m256d
3167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3168_mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
3169{
3170 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3171 (__v4df)
3172 _mm256_setzero_pd (),
3173 (__mmask8) __U);
3174}
3175
3176extern __inline __m128d
3177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3178_mm_rcp14_pd (__m128d __A)
3179{
3180 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3181 (__v2df)
3182 _mm_setzero_pd (),
3183 (__mmask8) -1);
3184}
3185
3186extern __inline __m128d
3187__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3188_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3189{
3190 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3191 (__v2df) __W,
3192 (__mmask8) __U);
3193}
3194
3195extern __inline __m128d
3196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3197_mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
3198{
3199 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3200 (__v2df)
3201 _mm_setzero_pd (),
3202 (__mmask8) __U);
3203}
3204
3205extern __inline __m256
3206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3207_mm256_rcp14_ps (__m256 __A)
3208{
3209 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3210 (__v8sf)
3211 _mm256_setzero_ps (),
3212 (__mmask8) -1);
3213}
3214
3215extern __inline __m256
3216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3217_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3218{
3219 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3220 (__v8sf) __W,
3221 (__mmask8) __U);
3222}
3223
3224extern __inline __m256
3225__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3226_mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
3227{
3228 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3229 (__v8sf)
3230 _mm256_setzero_ps (),
3231 (__mmask8) __U);
3232}
3233
3234extern __inline __m128
3235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3236_mm_rcp14_ps (__m128 __A)
3237{
3238 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3239 (__v4sf)
3240 _mm_setzero_ps (),
3241 (__mmask8) -1);
3242}
3243
3244extern __inline __m128
3245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3246_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3247{
3248 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3249 (__v4sf) __W,
3250 (__mmask8) __U);
3251}
3252
3253extern __inline __m128
3254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3255_mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
3256{
3257 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3258 (__v4sf)
3259 _mm_setzero_ps (),
3260 (__mmask8) __U);
3261}
3262
3263extern __inline __m256d
3264__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3265_mm256_rsqrt14_pd (__m256d __A)
3266{
3267 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3268 (__v4df)
3269 _mm256_setzero_pd (),
3270 (__mmask8) -1);
3271}
3272
3273extern __inline __m256d
3274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3275_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3276{
3277 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3278 (__v4df) __W,
3279 (__mmask8) __U);
3280}
3281
3282extern __inline __m256d
3283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3284_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
3285{
3286 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3287 (__v4df)
3288 _mm256_setzero_pd (),
3289 (__mmask8) __U);
3290}
3291
3292extern __inline __m128d
3293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3294_mm_rsqrt14_pd (__m128d __A)
3295{
3296 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3297 (__v2df)
3298 _mm_setzero_pd (),
3299 (__mmask8) -1);
3300}
3301
3302extern __inline __m128d
3303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3304_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3305{
3306 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3307 (__v2df) __W,
3308 (__mmask8) __U);
3309}
3310
3311extern __inline __m128d
3312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3313_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
3314{
3315 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3316 (__v2df)
3317 _mm_setzero_pd (),
3318 (__mmask8) __U);
3319}
3320
3321extern __inline __m256
3322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3323_mm256_rsqrt14_ps (__m256 __A)
3324{
3325 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3326 (__v8sf)
3327 _mm256_setzero_ps (),
3328 (__mmask8) -1);
3329}
3330
3331extern __inline __m256
3332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3333_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3334{
3335 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3336 (__v8sf) __W,
3337 (__mmask8) __U);
3338}
3339
3340extern __inline __m256
3341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3342_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
3343{
3344 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3345 (__v8sf)
3346 _mm256_setzero_ps (),
3347 (__mmask8) __U);
3348}
3349
3350extern __inline __m128
3351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3352_mm_rsqrt14_ps (__m128 __A)
3353{
3354 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3355 (__v4sf)
3356 _mm_setzero_ps (),
3357 (__mmask8) -1);
3358}
3359
3360extern __inline __m128
3361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3362_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3363{
3364 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3365 (__v4sf) __W,
3366 (__mmask8) __U);
3367}
3368
3369extern __inline __m128
3370__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3371_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
3372{
3373 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3374 (__v4sf)
3375 _mm_setzero_ps (),
3376 (__mmask8) __U);
3377}
3378
3379extern __inline __m256d
3380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3381_mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A)
3382{
3383 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3384 (__v4df) __W,
3385 (__mmask8) __U);
3386}
3387
3388extern __inline __m256d
3389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3390_mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A)
3391{
3392 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3393 (__v4df)
3394 _mm256_setzero_pd (),
3395 (__mmask8) __U);
3396}
3397
3398extern __inline __m128d
3399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3400_mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A)
3401{
3402 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3403 (__v2df) __W,
3404 (__mmask8) __U);
3405}
3406
3407extern __inline __m128d
3408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3409_mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A)
3410{
3411 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3412 (__v2df)
3413 _mm_setzero_pd (),
3414 (__mmask8) __U);
3415}
3416
3417extern __inline __m256
3418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3419_mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A)
3420{
3421 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3422 (__v8sf) __W,
3423 (__mmask8) __U);
3424}
3425
3426extern __inline __m256
3427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3428_mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A)
3429{
3430 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3431 (__v8sf)
3432 _mm256_setzero_ps (),
3433 (__mmask8) __U);
3434}
3435
3436extern __inline __m128
3437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3438_mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A)
3439{
3440 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3441 (__v4sf) __W,
3442 (__mmask8) __U);
3443}
3444
3445extern __inline __m128
3446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3447_mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A)
3448{
3449 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3450 (__v4sf)
3451 _mm_setzero_ps (),
3452 (__mmask8) __U);
3453}
3454
3455extern __inline __m256i
3456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3457_mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3458 __m256i __B)
3459{
3460 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3461 (__v8si) __B,
3462 (__v8si) __W,
3463 (__mmask8) __U);
3464}
3465
3466extern __inline __m256i
3467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3468_mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3469{
3470 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3471 (__v8si) __B,
3472 (__v8si)
3473 _mm256_setzero_si256 (),
3474 (__mmask8) __U);
3475}
3476
3477extern __inline __m256i
3478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3479_mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3480 __m256i __B)
3481{
3482 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3483 (__v4di) __B,
3484 (__v4di) __W,
3485 (__mmask8) __U);
3486}
3487
3488extern __inline __m256i
3489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3490_mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3491{
3492 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3493 (__v4di) __B,
3494 (__v4di)
3495 _mm256_setzero_si256 (),
3496 (__mmask8) __U);
3497}
3498
3499extern __inline __m256i
3500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3501_mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3502 __m256i __B)
3503{
3504 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3505 (__v8si) __B,
3506 (__v8si) __W,
3507 (__mmask8) __U);
3508}
3509
3510extern __inline __m256i
3511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3512_mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3513{
3514 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3515 (__v8si) __B,
3516 (__v8si)
3517 _mm256_setzero_si256 (),
3518 (__mmask8) __U);
3519}
3520
3521extern __inline __m256i
3522__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3523_mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3524 __m256i __B)
3525{
3526 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3527 (__v4di) __B,
3528 (__v4di) __W,
3529 (__mmask8) __U);
3530}
3531
3532extern __inline __m256i
3533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3534_mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3535{
3536 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3537 (__v4di) __B,
3538 (__v4di)
3539 _mm256_setzero_si256 (),
3540 (__mmask8) __U);
3541}
3542
3543extern __inline __m128i
3544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3545_mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3546 __m128i __B)
3547{
3548 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3549 (__v4si) __B,
3550 (__v4si) __W,
3551 (__mmask8) __U);
3552}
3553
3554extern __inline __m128i
3555__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3556_mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3557{
3558 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3559 (__v4si) __B,
3560 (__v4si)
3561 _mm_setzero_si128 (),
3562 (__mmask8) __U);
3563}
3564
3565extern __inline __m128i
3566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3567_mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3568 __m128i __B)
3569{
3570 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3571 (__v2di) __B,
3572 (__v2di) __W,
3573 (__mmask8) __U);
3574}
3575
3576extern __inline __m128i
3577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3578_mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3579{
3580 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3581 (__v2di) __B,
3582 (__v2di)
3583 _mm_setzero_si128 (),
3584 (__mmask8) __U);
3585}
3586
3587extern __inline __m128i
3588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3589_mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3590 __m128i __B)
3591{
3592 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3593 (__v4si) __B,
3594 (__v4si) __W,
3595 (__mmask8) __U);
3596}
3597
3598extern __inline __m128i
3599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3600_mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3601{
3602 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3603 (__v4si) __B,
3604 (__v4si)
3605 _mm_setzero_si128 (),
3606 (__mmask8) __U);
3607}
3608
3609extern __inline __m128i
3610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3611_mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3612 __m128i __B)
3613{
3614 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3615 (__v2di) __B,
3616 (__v2di) __W,
3617 (__mmask8) __U);
3618}
3619
3620extern __inline __m128i
3621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3622_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3623{
3624 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3625 (__v2di) __B,
3626 (__v2di)
3627 _mm_setzero_si128 (),
3628 (__mmask8) __U);
3629}
3630
3631extern __inline __m256
3632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3633_mm256_getexp_ps (__m256 __A)
3634{
3635 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3636 (__v8sf)
3637 _mm256_setzero_ps (),
3638 (__mmask8) -1);
3639}
3640
3641extern __inline __m256
3642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3643_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A)
3644{
3645 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3646 (__v8sf) __W,
3647 (__mmask8) __U);
3648}
3649
3650extern __inline __m256
3651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3652_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A)
3653{
3654 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3655 (__v8sf)
3656 _mm256_setzero_ps (),
3657 (__mmask8) __U);
3658}
3659
3660extern __inline __m256d
3661__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3662_mm256_getexp_pd (__m256d __A)
3663{
3664 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3665 (__v4df)
3666 _mm256_setzero_pd (),
3667 (__mmask8) -1);
3668}
3669
3670extern __inline __m256d
3671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3672_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A)
3673{
3674 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3675 (__v4df) __W,
3676 (__mmask8) __U);
3677}
3678
3679extern __inline __m256d
3680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3681_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A)
3682{
3683 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3684 (__v4df)
3685 _mm256_setzero_pd (),
3686 (__mmask8) __U);
3687}
3688
3689extern __inline __m128
3690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3691_mm_getexp_ps (__m128 __A)
3692{
3693 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3694 (__v4sf)
3695 _mm_setzero_ps (),
3696 (__mmask8) -1);
3697}
3698
3699extern __inline __m128
3700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3701_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A)
3702{
3703 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3704 (__v4sf) __W,
3705 (__mmask8) __U);
3706}
3707
3708extern __inline __m128
3709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3710_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A)
3711{
3712 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3713 (__v4sf)
3714 _mm_setzero_ps (),
3715 (__mmask8) __U);
3716}
3717
3718extern __inline __m128d
3719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3720_mm_getexp_pd (__m128d __A)
3721{
3722 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3723 (__v2df)
3724 _mm_setzero_pd (),
3725 (__mmask8) -1);
3726}
3727
3728extern __inline __m128d
3729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3730_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A)
3731{
3732 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3733 (__v2df) __W,
3734 (__mmask8) __U);
3735}
3736
3737extern __inline __m128d
3738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3739_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A)
3740{
3741 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3742 (__v2df)
3743 _mm_setzero_pd (),
3744 (__mmask8) __U);
3745}
3746
3747extern __inline __m256i
3748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3749_mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3750 __m128i __B)
3751{
3752 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3753 (__v4si) __B,
3754 (__v8si) __W,
3755 (__mmask8) __U);
3756}
3757
3758extern __inline __m256i
3759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3760_mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
3761{
3762 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3763 (__v4si) __B,
3764 (__v8si)
3765 _mm256_setzero_si256 (),
3766 (__mmask8) __U);
3767}
3768
3769extern __inline __m128i
3770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3771_mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3772 __m128i __B)
3773{
3774 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3775 (__v4si) __B,
3776 (__v4si) __W,
3777 (__mmask8) __U);
3778}
3779
3780extern __inline __m128i
3781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3782_mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3783{
3784 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3785 (__v4si) __B,
3786 (__v4si)
3787 _mm_setzero_si128 (),
3788 (__mmask8) __U);
3789}
3790
3791extern __inline __m256i
3792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3793_mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3794 __m128i __B)
3795{
3796 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3797 (__v2di) __B,
3798 (__v4di) __W,
3799 (__mmask8) __U);
3800}
3801
3802extern __inline __m256i
3803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3804_mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
3805{
3806 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3807 (__v2di) __B,
3808 (__v4di)
3809 _mm256_setzero_si256 (),
3810 (__mmask8) __U);
3811}
3812
3813extern __inline __m128i
3814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3815_mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3816 __m128i __B)
3817{
3818 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3819 (__v2di) __B,
3820 (__v2di) __W,
3821 (__mmask8) __U);
3822}
3823
3824extern __inline __m128i
3825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3826_mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3827{
3828 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3829 (__v2di) __B,
3830 (__v2di)
a25a7887 3831 _mm_setzero_si128 (),
936c0fe4
AI
3832 (__mmask8) __U);
3833}
3834
3835extern __inline __m256i
3836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3837_mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3838 __m256i __B)
3839{
3840 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3841 (__v8si) __B,
3842 (__v8si) __W,
3843 (__mmask8) __U);
3844}
3845
3846extern __inline __m256i
3847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3848_mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3849{
3850 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3851 (__v8si) __B,
3852 (__v8si)
3853 _mm256_setzero_si256 (),
3854 (__mmask8) __U);
3855}
3856
3857extern __inline __m256d
3858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3859_mm256_scalef_pd (__m256d __A, __m256d __B)
3860{
3861 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3862 (__v4df) __B,
3863 (__v4df)
3864 _mm256_setzero_pd (),
3865 (__mmask8) -1);
3866}
3867
3868extern __inline __m256d
3869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3870_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3871 __m256d __B)
3872{
3873 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3874 (__v4df) __B,
3875 (__v4df) __W,
3876 (__mmask8) __U);
3877}
3878
3879extern __inline __m256d
3880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3881_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B)
3882{
3883 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3884 (__v4df) __B,
3885 (__v4df)
3886 _mm256_setzero_pd (),
3887 (__mmask8) __U);
3888}
3889
3890extern __inline __m256
3891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3892_mm256_scalef_ps (__m256 __A, __m256 __B)
3893{
3894 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3895 (__v8sf) __B,
3896 (__v8sf)
3897 _mm256_setzero_ps (),
3898 (__mmask8) -1);
3899}
3900
3901extern __inline __m256
3902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3903_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3904 __m256 __B)
3905{
3906 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3907 (__v8sf) __B,
3908 (__v8sf) __W,
3909 (__mmask8) __U);
3910}
3911
3912extern __inline __m256
3913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3914_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B)
3915{
3916 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3917 (__v8sf) __B,
3918 (__v8sf)
3919 _mm256_setzero_ps (),
3920 (__mmask8) __U);
3921}
3922
3923extern __inline __m128d
3924__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3925_mm_scalef_pd (__m128d __A, __m128d __B)
3926{
3927 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3928 (__v2df) __B,
3929 (__v2df)
3930 _mm_setzero_pd (),
3931 (__mmask8) -1);
3932}
3933
3934extern __inline __m128d
3935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3936_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3937 __m128d __B)
3938{
3939 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3940 (__v2df) __B,
3941 (__v2df) __W,
3942 (__mmask8) __U);
3943}
3944
3945extern __inline __m128d
3946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3947_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B)
3948{
3949 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3950 (__v2df) __B,
3951 (__v2df)
3952 _mm_setzero_pd (),
3953 (__mmask8) __U);
3954}
3955
3956extern __inline __m128
3957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3958_mm_scalef_ps (__m128 __A, __m128 __B)
3959{
3960 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3961 (__v4sf) __B,
3962 (__v4sf)
3963 _mm_setzero_ps (),
3964 (__mmask8) -1);
3965}
3966
3967extern __inline __m128
3968__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3969_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
3970{
3971 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3972 (__v4sf) __B,
3973 (__v4sf) __W,
3974 (__mmask8) __U);
3975}
3976
3977extern __inline __m128
3978__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3979_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B)
3980{
3981 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3982 (__v4sf) __B,
3983 (__v4sf)
3984 _mm_setzero_ps (),
3985 (__mmask8) __U);
3986}
3987
3988extern __inline __m256d
3989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3990_mm256_mask_fmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
3991 __m256d __C)
3992{
3993 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
3994 (__v4df) __B,
3995 (__v4df) __C,
3996 (__mmask8) __U);
3997}
3998
3999extern __inline __m256d
4000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4001_mm256_mask3_fmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4002 __mmask8 __U)
4003{
4004 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
4005 (__v4df) __B,
4006 (__v4df) __C,
4007 (__mmask8) __U);
4008}
4009
4010extern __inline __m256d
4011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4012_mm256_maskz_fmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4013 __m256d __C)
4014{
4015 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4016 (__v4df) __B,
4017 (__v4df) __C,
4018 (__mmask8) __U);
4019}
4020
4021extern __inline __m128d
4022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4023_mm_mask_fmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4024{
4025 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4026 (__v2df) __B,
4027 (__v2df) __C,
4028 (__mmask8) __U);
4029}
4030
4031extern __inline __m128d
4032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4033_mm_mask3_fmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4034 __mmask8 __U)
4035{
4036 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
4037 (__v2df) __B,
4038 (__v2df) __C,
4039 (__mmask8) __U);
4040}
4041
4042extern __inline __m128d
4043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4044_mm_maskz_fmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4045 __m128d __C)
4046{
4047 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4048 (__v2df) __B,
4049 (__v2df) __C,
4050 (__mmask8) __U);
4051}
4052
4053extern __inline __m256
4054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4055_mm256_mask_fmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4056{
4057 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4058 (__v8sf) __B,
4059 (__v8sf) __C,
4060 (__mmask8) __U);
4061}
4062
4063extern __inline __m256
4064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4065_mm256_mask3_fmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4066 __mmask8 __U)
4067{
4068 return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
4069 (__v8sf) __B,
4070 (__v8sf) __C,
4071 (__mmask8) __U);
4072}
4073
4074extern __inline __m256
4075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4076_mm256_maskz_fmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4077 __m256 __C)
4078{
4079 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4080 (__v8sf) __B,
4081 (__v8sf) __C,
4082 (__mmask8) __U);
4083}
4084
4085extern __inline __m128
4086__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4087_mm_mask_fmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4088{
4089 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4090 (__v4sf) __B,
4091 (__v4sf) __C,
4092 (__mmask8) __U);
4093}
4094
4095extern __inline __m128
4096__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4097_mm_mask3_fmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4098{
4099 return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
4100 (__v4sf) __B,
4101 (__v4sf) __C,
4102 (__mmask8) __U);
4103}
4104
4105extern __inline __m128
4106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4107_mm_maskz_fmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4108{
4109 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4110 (__v4sf) __B,
4111 (__v4sf) __C,
4112 (__mmask8) __U);
4113}
4114
4115extern __inline __m256d
4116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4117_mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4118 __m256d __C)
4119{
4120 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
4121 (__v4df) __B,
4122 -(__v4df) __C,
4123 (__mmask8) __U);
4124}
4125
4126extern __inline __m256d
4127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4128_mm256_mask3_fmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4129 __mmask8 __U)
4130{
4131 return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
4132 (__v4df) __B,
4133 (__v4df) __C,
4134 (__mmask8) __U);
4135}
4136
4137extern __inline __m256d
4138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4139_mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4140 __m256d __C)
4141{
4142 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4143 (__v4df) __B,
4144 -(__v4df) __C,
4145 (__mmask8) __U);
4146}
4147
4148extern __inline __m128d
4149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4150_mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4151{
4152 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4153 (__v2df) __B,
4154 -(__v2df) __C,
4155 (__mmask8) __U);
4156}
4157
4158extern __inline __m128d
4159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4160_mm_mask3_fmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4161 __mmask8 __U)
4162{
4163 return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
4164 (__v2df) __B,
4165 (__v2df) __C,
4166 (__mmask8) __U);
4167}
4168
4169extern __inline __m128d
4170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4171_mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4172 __m128d __C)
4173{
4174 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4175 (__v2df) __B,
4176 -(__v2df) __C,
4177 (__mmask8) __U);
4178}
4179
4180extern __inline __m256
4181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4182_mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4183{
4184 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4185 (__v8sf) __B,
4186 -(__v8sf) __C,
4187 (__mmask8) __U);
4188}
4189
4190extern __inline __m256
4191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4192_mm256_mask3_fmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4193 __mmask8 __U)
4194{
4195 return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
4196 (__v8sf) __B,
4197 (__v8sf) __C,
4198 (__mmask8) __U);
4199}
4200
4201extern __inline __m256
4202__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4203_mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4204 __m256 __C)
4205{
4206 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4207 (__v8sf) __B,
4208 -(__v8sf) __C,
4209 (__mmask8) __U);
4210}
4211
4212extern __inline __m128
4213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4214_mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4215{
4216 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4217 (__v4sf) __B,
4218 -(__v4sf) __C,
4219 (__mmask8) __U);
4220}
4221
4222extern __inline __m128
4223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4224_mm_mask3_fmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4225{
4226 return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
4227 (__v4sf) __B,
4228 (__v4sf) __C,
4229 (__mmask8) __U);
4230}
4231
4232extern __inline __m128
4233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4234_mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4235{
4236 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4237 (__v4sf) __B,
4238 -(__v4sf) __C,
4239 (__mmask8) __U);
4240}
4241
4242extern __inline __m256d
4243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4244_mm256_mask_fmaddsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4245 __m256d __C)
4246{
4247 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4248 (__v4df) __B,
4249 (__v4df) __C,
4250 (__mmask8) __U);
4251}
4252
4253extern __inline __m256d
4254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4255_mm256_mask3_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C,
4256 __mmask8 __U)
4257{
4258 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
4259 (__v4df) __B,
4260 (__v4df) __C,
4261 (__mmask8)
4262 __U);
4263}
4264
4265extern __inline __m256d
4266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4267_mm256_maskz_fmaddsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4268 __m256d __C)
4269{
4270 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4271 (__v4df) __B,
4272 (__v4df) __C,
4273 (__mmask8)
4274 __U);
4275}
4276
4277extern __inline __m128d
4278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4279_mm_mask_fmaddsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4280 __m128d __C)
4281{
4282 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4283 (__v2df) __B,
4284 (__v2df) __C,
4285 (__mmask8) __U);
4286}
4287
4288extern __inline __m128d
4289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4290_mm_mask3_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C,
4291 __mmask8 __U)
4292{
4293 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
4294 (__v2df) __B,
4295 (__v2df) __C,
4296 (__mmask8)
4297 __U);
4298}
4299
4300extern __inline __m128d
4301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4302_mm_maskz_fmaddsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4303 __m128d __C)
4304{
4305 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4306 (__v2df) __B,
4307 (__v2df) __C,
4308 (__mmask8)
4309 __U);
4310}
4311
4312extern __inline __m256
4313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4314_mm256_mask_fmaddsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4315 __m256 __C)
4316{
4317 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4318 (__v8sf) __B,
4319 (__v8sf) __C,
4320 (__mmask8) __U);
4321}
4322
4323extern __inline __m256
4324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4325_mm256_mask3_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C,
4326 __mmask8 __U)
4327{
4328 return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
4329 (__v8sf) __B,
4330 (__v8sf) __C,
4331 (__mmask8) __U);
4332}
4333
4334extern __inline __m256
4335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4336_mm256_maskz_fmaddsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4337 __m256 __C)
4338{
4339 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4340 (__v8sf) __B,
4341 (__v8sf) __C,
4342 (__mmask8) __U);
4343}
4344
4345extern __inline __m128
4346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4347_mm_mask_fmaddsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4348{
4349 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4350 (__v4sf) __B,
4351 (__v4sf) __C,
4352 (__mmask8) __U);
4353}
4354
4355extern __inline __m128
4356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4357_mm_mask3_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C,
4358 __mmask8 __U)
4359{
4360 return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
4361 (__v4sf) __B,
4362 (__v4sf) __C,
4363 (__mmask8) __U);
4364}
4365
4366extern __inline __m128
4367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4368_mm_maskz_fmaddsub_ps (__mmask8 __U, __m128 __A, __m128 __B,
4369 __m128 __C)
4370{
4371 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4372 (__v4sf) __B,
4373 (__v4sf) __C,
4374 (__mmask8) __U);
4375}
4376
4377extern __inline __m256d
4378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4379_mm256_mask_fmsubadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4380 __m256d __C)
4381{
4382 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4383 (__v4df) __B,
4384 -(__v4df) __C,
4385 (__mmask8) __U);
4386}
4387
4388extern __inline __m256d
4389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4390_mm256_mask3_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C,
4391 __mmask8 __U)
4392{
4393 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
4394 (__v4df) __B,
4395 (__v4df) __C,
4396 (__mmask8)
4397 __U);
4398}
4399
4400extern __inline __m256d
4401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4402_mm256_maskz_fmsubadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4403 __m256d __C)
4404{
4405 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4406 (__v4df) __B,
4407 -(__v4df) __C,
4408 (__mmask8)
4409 __U);
4410}
4411
4412extern __inline __m128d
4413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4414_mm_mask_fmsubadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4415 __m128d __C)
4416{
4417 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4418 (__v2df) __B,
4419 -(__v2df) __C,
4420 (__mmask8) __U);
4421}
4422
4423extern __inline __m128d
4424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4425_mm_mask3_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C,
4426 __mmask8 __U)
4427{
4428 return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
4429 (__v2df) __B,
4430 (__v2df) __C,
4431 (__mmask8)
4432 __U);
4433}
4434
4435extern __inline __m128d
4436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4437_mm_maskz_fmsubadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4438 __m128d __C)
4439{
4440 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4441 (__v2df) __B,
4442 -(__v2df) __C,
4443 (__mmask8)
4444 __U);
4445}
4446
4447extern __inline __m256
4448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4449_mm256_mask_fmsubadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4450 __m256 __C)
4451{
4452 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4453 (__v8sf) __B,
4454 -(__v8sf) __C,
4455 (__mmask8) __U);
4456}
4457
4458extern __inline __m256
4459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4460_mm256_mask3_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C,
4461 __mmask8 __U)
4462{
4463 return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
4464 (__v8sf) __B,
4465 (__v8sf) __C,
4466 (__mmask8) __U);
4467}
4468
4469extern __inline __m256
4470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4471_mm256_maskz_fmsubadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4472 __m256 __C)
4473{
4474 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4475 (__v8sf) __B,
4476 -(__v8sf) __C,
4477 (__mmask8) __U);
4478}
4479
4480extern __inline __m128
4481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4482_mm_mask_fmsubadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4483{
4484 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4485 (__v4sf) __B,
4486 -(__v4sf) __C,
4487 (__mmask8) __U);
4488}
4489
4490extern __inline __m128
4491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4492_mm_mask3_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C,
4493 __mmask8 __U)
4494{
4495 return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
4496 (__v4sf) __B,
4497 (__v4sf) __C,
4498 (__mmask8) __U);
4499}
4500
4501extern __inline __m128
4502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4503_mm_maskz_fmsubadd_ps (__mmask8 __U, __m128 __A, __m128 __B,
4504 __m128 __C)
4505{
4506 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4507 (__v4sf) __B,
4508 -(__v4sf) __C,
4509 (__mmask8) __U);
4510}
4511
4512extern __inline __m256d
4513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4514_mm256_mask_fnmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4515 __m256d __C)
4516{
4517 return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
4518 (__v4df) __B,
4519 (__v4df) __C,
4520 (__mmask8) __U);
4521}
4522
4523extern __inline __m256d
4524__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4525_mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4526 __mmask8 __U)
4527{
4528 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
4529 (__v4df) __B,
4530 (__v4df) __C,
4531 (__mmask8) __U);
4532}
4533
4534extern __inline __m256d
4535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4536_mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4537 __m256d __C)
4538{
4539 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
4540 (__v4df) __B,
4541 (__v4df) __C,
4542 (__mmask8) __U);
4543}
4544
4545extern __inline __m128d
4546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4547_mm_mask_fnmadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4548 __m128d __C)
4549{
4550 return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
4551 (__v2df) __B,
4552 (__v2df) __C,
4553 (__mmask8) __U);
4554}
4555
4556extern __inline __m128d
4557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4558_mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4559 __mmask8 __U)
4560{
4561 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
4562 (__v2df) __B,
4563 (__v2df) __C,
4564 (__mmask8) __U);
4565}
4566
4567extern __inline __m128d
4568__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4569_mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4570 __m128d __C)
4571{
4572 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
4573 (__v2df) __B,
4574 (__v2df) __C,
4575 (__mmask8) __U);
4576}
4577
4578extern __inline __m256
4579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4580_mm256_mask_fnmadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4581 __m256 __C)
4582{
4583 return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
4584 (__v8sf) __B,
4585 (__v8sf) __C,
4586 (__mmask8) __U);
4587}
4588
4589extern __inline __m256
4590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4591_mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4592 __mmask8 __U)
4593{
4594 return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
4595 (__v8sf) __B,
4596 (__v8sf) __C,
4597 (__mmask8) __U);
4598}
4599
4600extern __inline __m256
4601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4602_mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4603 __m256 __C)
4604{
4605 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
4606 (__v8sf) __B,
4607 (__v8sf) __C,
4608 (__mmask8) __U);
4609}
4610
4611extern __inline __m128
4612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4613_mm_mask_fnmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4614{
4615 return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
4616 (__v4sf) __B,
4617 (__v4sf) __C,
4618 (__mmask8) __U);
4619}
4620
4621extern __inline __m128
4622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4623_mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4624{
4625 return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
4626 (__v4sf) __B,
4627 (__v4sf) __C,
4628 (__mmask8) __U);
4629}
4630
4631extern __inline __m128
4632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4633_mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4634{
4635 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
4636 (__v4sf) __B,
4637 (__v4sf) __C,
4638 (__mmask8) __U);
4639}
4640
4641extern __inline __m256d
4642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4643_mm256_mask_fnmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4644 __m256d __C)
4645{
4646 return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
4647 (__v4df) __B,
4648 (__v4df) __C,
4649 (__mmask8) __U);
4650}
4651
4652extern __inline __m256d
4653__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4654_mm256_mask3_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4655 __mmask8 __U)
4656{
4657 return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
4658 (__v4df) __B,
4659 (__v4df) __C,
4660 (__mmask8) __U);
4661}
4662
4663extern __inline __m256d
4664__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4665_mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4666 __m256d __C)
4667{
4668 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
4669 (__v4df) __B,
4670 -(__v4df) __C,
4671 (__mmask8) __U);
4672}
4673
4674extern __inline __m128d
4675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4676_mm_mask_fnmsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4677 __m128d __C)
4678{
4679 return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
4680 (__v2df) __B,
4681 (__v2df) __C,
4682 (__mmask8) __U);
4683}
4684
4685extern __inline __m128d
4686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4687_mm_mask3_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4688 __mmask8 __U)
4689{
4690 return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
4691 (__v2df) __B,
4692 (__v2df) __C,
4693 (__mmask8) __U);
4694}
4695
4696extern __inline __m128d
4697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4698_mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4699 __m128d __C)
4700{
4701 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
4702 (__v2df) __B,
4703 -(__v2df) __C,
4704 (__mmask8) __U);
4705}
4706
4707extern __inline __m256
4708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4709_mm256_mask_fnmsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4710 __m256 __C)
4711{
4712 return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
4713 (__v8sf) __B,
4714 (__v8sf) __C,
4715 (__mmask8) __U);
4716}
4717
4718extern __inline __m256
4719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4720_mm256_mask3_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4721 __mmask8 __U)
4722{
4723 return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
4724 (__v8sf) __B,
4725 (__v8sf) __C,
4726 (__mmask8) __U);
4727}
4728
4729extern __inline __m256
4730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4731_mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4732 __m256 __C)
4733{
4734 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
4735 (__v8sf) __B,
4736 -(__v8sf) __C,
4737 (__mmask8) __U);
4738}
4739
4740extern __inline __m128
4741__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4742_mm_mask_fnmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4743{
4744 return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
4745 (__v4sf) __B,
4746 (__v4sf) __C,
4747 (__mmask8) __U);
4748}
4749
4750extern __inline __m128
4751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4752_mm_mask3_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4753{
4754 return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
4755 (__v4sf) __B,
4756 (__v4sf) __C,
4757 (__mmask8) __U);
4758}
4759
4760extern __inline __m128
4761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4762_mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4763{
4764 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
4765 (__v4sf) __B,
4766 -(__v4sf) __C,
4767 (__mmask8) __U);
4768}
4769
4770extern __inline __m128i
4771__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4772_mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4773 __m128i __B)
4774{
4775 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4776 (__v4si) __B,
4777 (__v4si) __W,
4778 (__mmask8) __U);
4779}
4780
4781extern __inline __m128i
4782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4783_mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4784{
4785 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4786 (__v4si) __B,
4787 (__v4si)
4788 _mm_setzero_si128 (),
4789 (__mmask8) __U);
4790}
4791
4792extern __inline __m256i
4793__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4794_mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4795 __m256i __B)
4796{
4797 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4798 (__v8si) __B,
4799 (__v8si) __W,
4800 (__mmask8) __U);
4801}
4802
4803extern __inline __m256i
4804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4805_mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4806{
4807 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4808 (__v8si) __B,
4809 (__v8si)
4810 _mm256_setzero_si256 (),
4811 (__mmask8) __U);
4812}
4813
4814extern __inline __m128i
4815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4816_mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4817 __m128i __B)
4818{
4819 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4820 (__v4si) __B,
4821 (__v4si) __W,
4822 (__mmask8) __U);
4823}
4824
4825extern __inline __m128i
4826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4827_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4828{
4829 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4830 (__v4si) __B,
4831 (__v4si)
4832 _mm_setzero_si128 (),
4833 (__mmask8) __U);
4834}
4835
4836extern __inline __m256i
4837__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4838_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4839 __m256i __B)
4840{
4841 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4842 (__v8si) __B,
4843 (__v8si) __W,
4844 (__mmask8) __U);
4845}
4846
4847extern __inline __m256i
4848__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4849_mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4850{
4851 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4852 (__v8si) __B,
4853 (__v8si)
4854 _mm256_setzero_si256 (),
4855 (__mmask8) __U);
4856}
4857
01fd9f8d
L
4858extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4859_mm256_or_epi32 (__m256i __A, __m256i __B)
4860{
4861 return (__m256i) ((__v8su)__A | (__v8su)__B);
4862}
4863
936c0fe4
AI
4864extern __inline __m128i
4865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4866_mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4867{
4868 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4869 (__v4si) __B,
4870 (__v4si) __W,
4871 (__mmask8) __U);
4872}
4873
4874extern __inline __m128i
4875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4876_mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4877{
4878 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4879 (__v4si) __B,
4880 (__v4si)
4881 _mm_setzero_si128 (),
4882 (__mmask8) __U);
4883}
4884
01fd9f8d
L
4885extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4886_mm_or_epi32 (__m128i __A, __m128i __B)
4887{
4888 return (__m128i) ((__v4su)__A | (__v4su)__B);
4889}
4890
936c0fe4
AI
4891extern __inline __m256i
4892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4893_mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4894 __m256i __B)
4895{
4896 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4897 (__v8si) __B,
4898 (__v8si) __W,
4899 (__mmask8) __U);
4900}
4901
4902extern __inline __m256i
4903__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4904_mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4905{
4906 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4907 (__v8si) __B,
4908 (__v8si)
4909 _mm256_setzero_si256 (),
4910 (__mmask8) __U);
4911}
4912
01fd9f8d
L
4913extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4914_mm256_xor_epi32 (__m256i __A, __m256i __B)
4915{
4916 return (__m256i) ((__v8su)__A ^ (__v8su)__B);
4917}
4918
936c0fe4
AI
4919extern __inline __m128i
4920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4921_mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4922 __m128i __B)
4923{
4924 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4925 (__v4si) __B,
4926 (__v4si) __W,
4927 (__mmask8) __U);
4928}
4929
4930extern __inline __m128i
4931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4932_mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4933{
4934 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4935 (__v4si) __B,
4936 (__v4si)
4937 _mm_setzero_si128 (),
4938 (__mmask8) __U);
4939}
4940
01fd9f8d
L
4941extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4942_mm_xor_epi32 (__m128i __A, __m128i __B)
4943{
4944 return (__m128i) ((__v4su)__A ^ (__v4su)__B);
4945}
4946
936c0fe4
AI
4947extern __inline __m128
4948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4949_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A)
4950{
4951 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4952 (__v4sf) __W,
4953 (__mmask8) __U);
4954}
4955
4956extern __inline __m128
4957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4958_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A)
4959{
4960 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4961 (__v4sf)
4962 _mm_setzero_ps (),
4963 (__mmask8) __U);
4964}
4965
4966extern __inline __m128
4967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4968_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A)
4969{
4970 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4971 (__v4sf) __W,
4972 (__mmask8) __U);
4973}
4974
4975extern __inline __m128
4976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4977_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A)
4978{
4979 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4980 (__v4sf)
4981 _mm_setzero_ps (),
4982 (__mmask8) __U);
4983}
4984
4985extern __inline __m256i
4986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4987_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
4988{
4989 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4990 (__v8si) __W,
4991 (__mmask8) __U);
4992}
4993
4994extern __inline __m256i
4995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4996_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A)
4997{
4998 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4999 (__v8si)
5000 _mm256_setzero_si256 (),
5001 (__mmask8) __U);
5002}
5003
5004extern __inline __m128i
5005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5006_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
5007{
5008 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
5009 (__v4si) __W,
5010 (__mmask8) __U);
5011}
5012
5013extern __inline __m128i
5014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5015_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A)
5016{
5017 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
5018 (__v4si)
5019 _mm_setzero_si128 (),
5020 (__mmask8) __U);
5021}
5022
5023extern __inline __m256i
5024__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5025_mm256_cvtps_epu32 (__m256 __A)
5026{
5027 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5028 (__v8si)
5029 _mm256_setzero_si256 (),
5030 (__mmask8) -1);
5031}
5032
5033extern __inline __m256i
5034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5035_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
5036{
5037 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5038 (__v8si) __W,
5039 (__mmask8) __U);
5040}
5041
5042extern __inline __m256i
5043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5044_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A)
5045{
5046 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5047 (__v8si)
5048 _mm256_setzero_si256 (),
5049 (__mmask8) __U);
5050}
5051
5052extern __inline __m128i
5053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5054_mm_cvtps_epu32 (__m128 __A)
5055{
5056 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5057 (__v4si)
5058 _mm_setzero_si128 (),
5059 (__mmask8) -1);
5060}
5061
5062extern __inline __m128i
5063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5064_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
5065{
5066 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5067 (__v4si) __W,
5068 (__mmask8) __U);
5069}
5070
5071extern __inline __m128i
5072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5073_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A)
5074{
5075 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5076 (__v4si)
5077 _mm_setzero_si128 (),
5078 (__mmask8) __U);
5079}
5080
5081extern __inline __m256d
5082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5083_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5084{
5085 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5086 (__v4df) __W,
5087 (__mmask8) __U);
5088}
5089
5090extern __inline __m256d
5091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5092_mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5093{
5094 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5095 (__v4df)
5096 _mm256_setzero_pd (),
5097 (__mmask8) __U);
5098}
5099
5100extern __inline __m128d
5101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5102_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5103{
5104 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5105 (__v2df) __W,
5106 (__mmask8) __U);
5107}
5108
5109extern __inline __m128d
5110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5111_mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5112{
5113 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5114 (__v2df)
5115 _mm_setzero_pd (),
5116 (__mmask8) __U);
5117}
5118
5119extern __inline __m256
5120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5121_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5122{
5123 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5124 (__v8sf) __W,
5125 (__mmask8) __U);
5126}
5127
5128extern __inline __m256
5129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5130_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
5131{
5132 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5133 (__v8sf)
5134 _mm256_setzero_ps (),
5135 (__mmask8) __U);
5136}
5137
5138extern __inline __m128
5139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5140_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5141{
5142 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5143 (__v4sf) __W,
5144 (__mmask8) __U);
5145}
5146
5147extern __inline __m128
5148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5149_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
5150{
5151 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5152 (__v4sf)
5153 _mm_setzero_ps (),
5154 (__mmask8) __U);
5155}
5156
5157extern __inline __m256
5158__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5159_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5160{
5161 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5162 (__v8sf) __W,
5163 (__mmask8) __U);
5164}
5165
5166extern __inline __m256
5167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5168_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
5169{
5170 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5171 (__v8sf)
5172 _mm256_setzero_ps (),
5173 (__mmask8) __U);
5174}
5175
5176extern __inline __m128
5177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5178_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5179{
5180 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5181 (__v4sf) __W,
5182 (__mmask8) __U);
5183}
5184
5185extern __inline __m128
5186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5187_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
5188{
5189 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5190 (__v4sf)
5191 _mm_setzero_ps (),
5192 (__mmask8) __U);
5193}
5194
5195extern __inline __m128i
5196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5197_mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5198 __m128i __B)
5199{
5200 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5201 (__v4si) __B,
5202 (__v4si) __W,
5203 (__mmask8) __U);
5204}
5205
5206extern __inline __m128i
5207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5208_mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5209{
5210 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5211 (__v4si) __B,
5212 (__v4si)
5213 _mm_setzero_si128 (),
5214 (__mmask8) __U);
5215}
5216
5217extern __inline __m256i
5218__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5219_mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5220 __m256i __B)
5221{
5222 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5223 (__v8si) __B,
5224 (__v8si) __W,
5225 (__mmask8) __U);
5226}
5227
5228extern __inline __m256i
5229__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5230_mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5231{
5232 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5233 (__v8si) __B,
5234 (__v8si)
5235 _mm256_setzero_si256 (),
5236 (__mmask8) __U);
5237}
5238
5239extern __inline __m128i
5240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5241_mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5242 __m128i __B)
5243{
5244 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5245 (__v2di) __B,
5246 (__v2di) __W,
5247 (__mmask8) __U);
5248}
5249
5250extern __inline __m128i
5251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5252_mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5253{
5254 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5255 (__v2di) __B,
5256 (__v2di)
a25a7887 5257 _mm_setzero_si128 (),
936c0fe4
AI
5258 (__mmask8) __U);
5259}
5260
5261extern __inline __m256i
5262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5263_mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5264 __m256i __B)
5265{
5266 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5267 (__v4di) __B,
5268 (__v4di) __W,
5269 (__mmask8) __U);
5270}
5271
5272extern __inline __m256i
5273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5274_mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5275{
5276 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5277 (__v4di) __B,
5278 (__v4di)
5279 _mm256_setzero_si256 (),
5280 (__mmask8) __U);
5281}
5282
5283extern __inline __m128i
5284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5285_mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5286 __m128i __B)
5287{
5288 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5289 (__v4si) __B,
5290 (__v4si) __W,
5291 (__mmask8) __U);
5292}
5293
5294extern __inline __m128i
5295__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5296_mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5297{
5298 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5299 (__v4si) __B,
5300 (__v4si)
5301 _mm_setzero_si128 (),
5302 (__mmask8) __U);
5303}
5304
5305extern __inline __m256i
5306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5307_mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5308 __m256i __B)
5309{
5310 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5311 (__v8si) __B,
5312 (__v8si) __W,
5313 (__mmask8) __U);
5314}
5315
5316extern __inline __m256i
5317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5318_mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5319{
5320 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5321 (__v8si) __B,
5322 (__v8si)
5323 _mm256_setzero_si256 (),
5324 (__mmask8) __U);
5325}
5326
5327extern __inline __m128i
5328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5329_mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5330 __m128i __B)
5331{
5332 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5333 (__v2di) __B,
5334 (__v2di) __W,
5335 (__mmask8) __U);
5336}
5337
5338extern __inline __m128i
5339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5340_mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5341{
5342 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5343 (__v2di) __B,
5344 (__v2di)
a25a7887 5345 _mm_setzero_si128 (),
936c0fe4
AI
5346 (__mmask8) __U);
5347}
5348
5349extern __inline __m256i
5350__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5351_mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5352 __m256i __B)
5353{
5354 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5355 (__v4di) __B,
5356 (__v4di) __W,
5357 (__mmask8) __U);
5358}
5359
5360extern __inline __m256i
5361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5362_mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5363{
5364 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5365 (__v4di) __B,
5366 (__v4di)
5367 _mm256_setzero_si256 (),
5368 (__mmask8) __U);
5369}
5370
eee5d6f5
AI
5371extern __inline __mmask8
5372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5373_mm_cmpeq_epu32_mask (__m128i __A, __m128i __B)
5374{
5375 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5376 (__v4si) __B, 0,
5377 (__mmask8) -1);
5378}
5379
936c0fe4
AI
5380extern __inline __mmask8
5381__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5382_mm_cmpeq_epi32_mask (__m128i __A, __m128i __B)
5383{
5384 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5385 (__v4si) __B,
5386 (__mmask8) -1);
5387}
5388
eee5d6f5
AI
5389extern __inline __mmask8
5390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5391_mm_mask_cmpeq_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5392{
5393 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5394 (__v4si) __B, 0, __U);
5395}
5396
936c0fe4
AI
5397extern __inline __mmask8
5398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5399_mm_mask_cmpeq_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5400{
5401 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5402 (__v4si) __B, __U);
5403}
5404
eee5d6f5
AI
5405extern __inline __mmask8
5406__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5407_mm256_cmpeq_epu32_mask (__m256i __A, __m256i __B)
5408{
5409 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5410 (__v8si) __B, 0,
5411 (__mmask8) -1);
5412}
5413
936c0fe4
AI
5414extern __inline __mmask8
5415__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5416_mm256_cmpeq_epi32_mask (__m256i __A, __m256i __B)
5417{
5418 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5419 (__v8si) __B,
5420 (__mmask8) -1);
5421}
5422
eee5d6f5
AI
5423extern __inline __mmask8
5424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5425_mm256_mask_cmpeq_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5426{
5427 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5428 (__v8si) __B, 0, __U);
5429}
5430
936c0fe4
AI
5431extern __inline __mmask8
5432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5433_mm256_mask_cmpeq_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5434{
5435 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5436 (__v8si) __B, __U);
5437}
5438
eee5d6f5
AI
5439extern __inline __mmask8
5440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5441_mm_cmpeq_epu64_mask (__m128i __A, __m128i __B)
5442{
5443 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5444 (__v2di) __B, 0,
5445 (__mmask8) -1);
5446}
5447
936c0fe4
AI
5448extern __inline __mmask8
5449__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5450_mm_cmpeq_epi64_mask (__m128i __A, __m128i __B)
5451{
5452 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5453 (__v2di) __B,
5454 (__mmask8) -1);
5455}
5456
eee5d6f5
AI
5457extern __inline __mmask8
5458__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5459_mm_mask_cmpeq_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5460{
5461 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5462 (__v2di) __B, 0, __U);
5463}
5464
936c0fe4
AI
5465extern __inline __mmask8
5466__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5467_mm_mask_cmpeq_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5468{
5469 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5470 (__v2di) __B, __U);
5471}
5472
eee5d6f5
AI
5473extern __inline __mmask8
5474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5475_mm256_cmpeq_epu64_mask (__m256i __A, __m256i __B)
5476{
5477 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5478 (__v4di) __B, 0,
5479 (__mmask8) -1);
5480}
5481
936c0fe4
AI
5482extern __inline __mmask8
5483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5484_mm256_cmpeq_epi64_mask (__m256i __A, __m256i __B)
5485{
5486 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5487 (__v4di) __B,
5488 (__mmask8) -1);
5489}
5490
eee5d6f5
AI
5491extern __inline __mmask8
5492__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5493_mm256_mask_cmpeq_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5494{
5495 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5496 (__v4di) __B, 0, __U);
5497}
5498
936c0fe4
AI
5499extern __inline __mmask8
5500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5501_mm256_mask_cmpeq_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5502{
5503 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5504 (__v4di) __B, __U);
5505}
5506
eee5d6f5
AI
5507extern __inline __mmask8
5508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5509_mm_cmpgt_epu32_mask (__m128i __A, __m128i __B)
5510{
5511 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5512 (__v4si) __B, 6,
5513 (__mmask8) -1);
5514}
5515
936c0fe4
AI
5516extern __inline __mmask8
5517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5518_mm_cmpgt_epi32_mask (__m128i __A, __m128i __B)
5519{
5520 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5521 (__v4si) __B,
5522 (__mmask8) -1);
5523}
5524
eee5d6f5
AI
5525extern __inline __mmask8
5526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5527_mm_mask_cmpgt_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5528{
5529 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5530 (__v4si) __B, 6, __U);
5531}
5532
936c0fe4
AI
5533extern __inline __mmask8
5534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5535_mm_mask_cmpgt_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5536{
5537 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5538 (__v4si) __B, __U);
5539}
5540
eee5d6f5
AI
5541extern __inline __mmask8
5542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5543_mm256_cmpgt_epu32_mask (__m256i __A, __m256i __B)
5544{
5545 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5546 (__v8si) __B, 6,
5547 (__mmask8) -1);
5548}
5549
936c0fe4
AI
5550extern __inline __mmask8
5551__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5552_mm256_cmpgt_epi32_mask (__m256i __A, __m256i __B)
5553{
5554 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5555 (__v8si) __B,
5556 (__mmask8) -1);
5557}
5558
eee5d6f5
AI
5559extern __inline __mmask8
5560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5561_mm256_mask_cmpgt_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5562{
5563 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5564 (__v8si) __B, 6, __U);
5565}
5566
936c0fe4
AI
5567extern __inline __mmask8
5568__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5569_mm256_mask_cmpgt_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5570{
5571 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5572 (__v8si) __B, __U);
5573}
5574
eee5d6f5
AI
5575extern __inline __mmask8
5576__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5577_mm_cmpgt_epu64_mask (__m128i __A, __m128i __B)
5578{
5579 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5580 (__v2di) __B, 6,
5581 (__mmask8) -1);
5582}
5583
936c0fe4
AI
5584extern __inline __mmask8
5585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5586_mm_cmpgt_epi64_mask (__m128i __A, __m128i __B)
5587{
5588 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5589 (__v2di) __B,
5590 (__mmask8) -1);
5591}
5592
eee5d6f5
AI
5593extern __inline __mmask8
5594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5595_mm_mask_cmpgt_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5596{
5597 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5598 (__v2di) __B, 6, __U);
5599}
5600
936c0fe4
AI
5601extern __inline __mmask8
5602__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5603_mm_mask_cmpgt_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5604{
5605 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5606 (__v2di) __B, __U);
5607}
5608
eee5d6f5
AI
5609extern __inline __mmask8
5610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5611_mm256_cmpgt_epu64_mask (__m256i __A, __m256i __B)
5612{
5613 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5614 (__v4di) __B, 6,
5615 (__mmask8) -1);
5616}
5617
936c0fe4
AI
5618extern __inline __mmask8
5619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5620_mm256_cmpgt_epi64_mask (__m256i __A, __m256i __B)
5621{
5622 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5623 (__v4di) __B,
5624 (__mmask8) -1);
5625}
5626
eee5d6f5
AI
5627extern __inline __mmask8
5628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5629_mm256_mask_cmpgt_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5630{
5631 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5632 (__v4di) __B, 6, __U);
5633}
5634
936c0fe4
AI
5635extern __inline __mmask8
5636__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5637_mm256_mask_cmpgt_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5638{
5639 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5640 (__v4di) __B, __U);
5641}
5642
5643extern __inline __mmask8
5644__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5645_mm_test_epi32_mask (__m128i __A, __m128i __B)
5646{
5647 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5648 (__v4si) __B,
5649 (__mmask8) -1);
5650}
5651
5652extern __inline __mmask8
5653__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5654_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5655{
5656 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5657 (__v4si) __B, __U);
5658}
5659
5660extern __inline __mmask8
5661__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5662_mm256_test_epi32_mask (__m256i __A, __m256i __B)
5663{
5664 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5665 (__v8si) __B,
5666 (__mmask8) -1);
5667}
5668
5669extern __inline __mmask8
5670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5671_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5672{
5673 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5674 (__v8si) __B, __U);
5675}
5676
5677extern __inline __mmask8
5678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5679_mm_test_epi64_mask (__m128i __A, __m128i __B)
5680{
5681 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5682 (__v2di) __B,
5683 (__mmask8) -1);
5684}
5685
5686extern __inline __mmask8
5687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5688_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5689{
5690 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5691 (__v2di) __B, __U);
5692}
5693
5694extern __inline __mmask8
5695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5696_mm256_test_epi64_mask (__m256i __A, __m256i __B)
5697{
5698 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5699 (__v4di) __B,
5700 (__mmask8) -1);
5701}
5702
5703extern __inline __mmask8
5704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5705_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5706{
5707 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5708 (__v4di) __B, __U);
5709}
5710
5711extern __inline __mmask8
5712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5713_mm_testn_epi32_mask (__m128i __A, __m128i __B)
5714{
5715 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5716 (__v4si) __B,
5717 (__mmask8) -1);
5718}
5719
5720extern __inline __mmask8
5721__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5722_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5723{
5724 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5725 (__v4si) __B, __U);
5726}
5727
5728extern __inline __mmask8
5729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5730_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
5731{
5732 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5733 (__v8si) __B,
5734 (__mmask8) -1);
5735}
5736
5737extern __inline __mmask8
5738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5739_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5740{
5741 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5742 (__v8si) __B, __U);
5743}
5744
5745extern __inline __mmask8
5746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5747_mm_testn_epi64_mask (__m128i __A, __m128i __B)
5748{
5749 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5750 (__v2di) __B,
5751 (__mmask8) -1);
5752}
5753
5754extern __inline __mmask8
5755__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5756_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5757{
5758 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5759 (__v2di) __B, __U);
5760}
5761
5762extern __inline __mmask8
5763__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5764_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
5765{
5766 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5767 (__v4di) __B,
5768 (__mmask8) -1);
5769}
5770
5771extern __inline __mmask8
5772__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5773_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5774{
5775 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5776 (__v4di) __B, __U);
5777}
5778
5779extern __inline __m256d
5780__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5781_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A)
5782{
5783 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5784 (__v4df) __W,
5785 (__mmask8) __U);
5786}
5787
5788extern __inline __m256d
5789__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5790_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A)
5791{
5792 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5793 (__v4df)
5794 _mm256_setzero_pd (),
5795 (__mmask8) __U);
5796}
5797
5798extern __inline void
5799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5800_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A)
5801{
5802 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
5803 (__v4df) __A,
5804 (__mmask8) __U);
5805}
5806
5807extern __inline __m128d
5808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5809_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A)
5810{
5811 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5812 (__v2df) __W,
5813 (__mmask8) __U);
5814}
5815
5816extern __inline __m128d
5817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5818_mm_maskz_compress_pd (__mmask8 __U, __m128d __A)
5819{
5820 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5821 (__v2df)
5822 _mm_setzero_pd (),
5823 (__mmask8) __U);
5824}
5825
5826extern __inline void
5827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5828_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A)
5829{
5830 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
5831 (__v2df) __A,
5832 (__mmask8) __U);
5833}
5834
5835extern __inline __m256
5836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5837_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A)
5838{
5839 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5840 (__v8sf) __W,
5841 (__mmask8) __U);
5842}
5843
5844extern __inline __m256
5845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5846_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A)
5847{
5848 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5849 (__v8sf)
5850 _mm256_setzero_ps (),
5851 (__mmask8) __U);
5852}
5853
5854extern __inline void
5855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5856_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A)
5857{
5858 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
5859 (__v8sf) __A,
5860 (__mmask8) __U);
5861}
5862
5863extern __inline __m128
5864__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5865_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A)
5866{
5867 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5868 (__v4sf) __W,
5869 (__mmask8) __U);
5870}
5871
5872extern __inline __m128
5873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5874_mm_maskz_compress_ps (__mmask8 __U, __m128 __A)
5875{
5876 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5877 (__v4sf)
5878 _mm_setzero_ps (),
5879 (__mmask8) __U);
5880}
5881
5882extern __inline void
5883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5884_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A)
5885{
5886 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
5887 (__v4sf) __A,
5888 (__mmask8) __U);
5889}
5890
5891extern __inline __m256i
5892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5893_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5894{
5895 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5896 (__v4di) __W,
5897 (__mmask8) __U);
5898}
5899
5900extern __inline __m256i
5901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5902_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A)
5903{
5904 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5905 (__v4di)
5906 _mm256_setzero_si256 (),
5907 (__mmask8) __U);
5908}
5909
5910extern __inline void
5911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5912_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5913{
5914 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
5915 (__v4di) __A,
5916 (__mmask8) __U);
5917}
5918
5919extern __inline __m128i
5920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5921_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5922{
5923 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5924 (__v2di) __W,
5925 (__mmask8) __U);
5926}
5927
5928extern __inline __m128i
5929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5930_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
5931{
5932 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5933 (__v2di)
a25a7887 5934 _mm_setzero_si128 (),
936c0fe4
AI
5935 (__mmask8) __U);
5936}
5937
5938extern __inline void
5939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5940_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5941{
5942 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
5943 (__v2di) __A,
5944 (__mmask8) __U);
5945}
5946
5947extern __inline __m256i
5948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5949_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5950{
5951 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5952 (__v8si) __W,
5953 (__mmask8) __U);
5954}
5955
5956extern __inline __m256i
5957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5958_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A)
5959{
5960 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5961 (__v8si)
5962 _mm256_setzero_si256 (),
5963 (__mmask8) __U);
5964}
5965
5966extern __inline void
5967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5968_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5969{
5970 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
5971 (__v8si) __A,
5972 (__mmask8) __U);
5973}
5974
5975extern __inline __m128i
5976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5977_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5978{
5979 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5980 (__v4si) __W,
5981 (__mmask8) __U);
5982}
5983
5984extern __inline __m128i
5985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5986_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A)
5987{
5988 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5989 (__v4si)
5990 _mm_setzero_si128 (),
5991 (__mmask8) __U);
5992}
5993
5994extern __inline void
5995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5996_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5997{
5998 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
5999 (__v4si) __A,
6000 (__mmask8) __U);
6001}
6002
6003extern __inline __m256d
6004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6005_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A)
6006{
6007 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
6008 (__v4df) __W,
6009 (__mmask8) __U);
6010}
6011
6012extern __inline __m256d
6013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6014_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A)
6015{
6016 return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A,
6017 (__v4df)
6018 _mm256_setzero_pd (),
6019 (__mmask8) __U);
6020}
6021
6022extern __inline __m256d
6023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6024_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P)
6025{
6026 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
6027 (__v4df) __W,
6028 (__mmask8)
6029 __U);
6030}
6031
6032extern __inline __m256d
6033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6034_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6035{
6036 return (__m256d) __builtin_ia32_expandloaddf256_maskz ((__v4df *) __P,
6037 (__v4df)
6038 _mm256_setzero_pd (),
6039 (__mmask8)
6040 __U);
6041}
6042
6043extern __inline __m128d
6044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6045_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A)
6046{
6047 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
6048 (__v2df) __W,
6049 (__mmask8) __U);
6050}
6051
6052extern __inline __m128d
6053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6054_mm_maskz_expand_pd (__mmask8 __U, __m128d __A)
6055{
6056 return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A,
6057 (__v2df)
6058 _mm_setzero_pd (),
6059 (__mmask8) __U);
6060}
6061
6062extern __inline __m128d
6063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6064_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P)
6065{
6066 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
6067 (__v2df) __W,
6068 (__mmask8)
6069 __U);
6070}
6071
6072extern __inline __m128d
6073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6074_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6075{
6076 return (__m128d) __builtin_ia32_expandloaddf128_maskz ((__v2df *) __P,
6077 (__v2df)
6078 _mm_setzero_pd (),
6079 (__mmask8)
6080 __U);
6081}
6082
6083extern __inline __m256
6084__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6085_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A)
6086{
6087 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
6088 (__v8sf) __W,
6089 (__mmask8) __U);
6090}
6091
6092extern __inline __m256
6093__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6094_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A)
6095{
6096 return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A,
6097 (__v8sf)
6098 _mm256_setzero_ps (),
6099 (__mmask8) __U);
6100}
6101
6102extern __inline __m256
6103__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6104_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P)
6105{
6106 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
6107 (__v8sf) __W,
6108 (__mmask8) __U);
6109}
6110
6111extern __inline __m256
6112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6113_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6114{
6115 return (__m256) __builtin_ia32_expandloadsf256_maskz ((__v8sf *) __P,
6116 (__v8sf)
6117 _mm256_setzero_ps (),
6118 (__mmask8)
6119 __U);
6120}
6121
6122extern __inline __m128
6123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6124_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A)
6125{
6126 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
6127 (__v4sf) __W,
6128 (__mmask8) __U);
6129}
6130
6131extern __inline __m128
6132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6133_mm_maskz_expand_ps (__mmask8 __U, __m128 __A)
6134{
6135 return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A,
6136 (__v4sf)
6137 _mm_setzero_ps (),
6138 (__mmask8) __U);
6139}
6140
6141extern __inline __m128
6142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6143_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P)
6144{
6145 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
6146 (__v4sf) __W,
6147 (__mmask8) __U);
6148}
6149
6150extern __inline __m128
6151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6152_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6153{
6154 return (__m128) __builtin_ia32_expandloadsf128_maskz ((__v4sf *) __P,
6155 (__v4sf)
6156 _mm_setzero_ps (),
6157 (__mmask8)
6158 __U);
6159}
6160
6161extern __inline __m256i
6162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6163_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
6164{
6165 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
6166 (__v4di) __W,
6167 (__mmask8) __U);
6168}
6169
6170extern __inline __m256i
6171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6172_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A)
6173{
6174 return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A,
6175 (__v4di)
6176 _mm256_setzero_si256 (),
6177 (__mmask8) __U);
6178}
6179
6180extern __inline __m256i
6181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6182_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
6183 void const *__P)
6184{
6185 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
6186 (__v4di) __W,
6187 (__mmask8)
6188 __U);
6189}
6190
6191extern __inline __m256i
6192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6193_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6194{
6195 return (__m256i) __builtin_ia32_expandloaddi256_maskz ((__v4di *) __P,
6196 (__v4di)
6197 _mm256_setzero_si256 (),
6198 (__mmask8)
6199 __U);
6200}
6201
6202extern __inline __m128i
6203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6204_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
6205{
6206 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
6207 (__v2di) __W,
6208 (__mmask8) __U);
6209}
6210
6211extern __inline __m128i
6212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6213_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A)
6214{
6215 return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A,
6216 (__v2di)
6217 _mm_setzero_si128 (),
6218 (__mmask8) __U);
6219}
6220
6221extern __inline __m128i
6222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6223_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
6224{
6225 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
6226 (__v2di) __W,
6227 (__mmask8)
6228 __U);
6229}
6230
6231extern __inline __m128i
6232__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6233_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6234{
6235 return (__m128i) __builtin_ia32_expandloaddi128_maskz ((__v2di *) __P,
6236 (__v2di)
6237 _mm_setzero_si128 (),
6238 (__mmask8)
6239 __U);
6240}
6241
6242extern __inline __m256i
6243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6244_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
6245{
6246 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
6247 (__v8si) __W,
6248 (__mmask8) __U);
6249}
6250
6251extern __inline __m256i
6252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6253_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A)
6254{
6255 return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A,
6256 (__v8si)
6257 _mm256_setzero_si256 (),
6258 (__mmask8) __U);
6259}
6260
6261extern __inline __m256i
6262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6263_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
6264 void const *__P)
6265{
6266 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
6267 (__v8si) __W,
6268 (__mmask8)
6269 __U);
6270}
6271
6272extern __inline __m256i
6273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6274_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6275{
6276 return (__m256i) __builtin_ia32_expandloadsi256_maskz ((__v8si *) __P,
6277 (__v8si)
6278 _mm256_setzero_si256 (),
6279 (__mmask8)
6280 __U);
6281}
6282
6283extern __inline __m128i
6284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6285_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
6286{
6287 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
6288 (__v4si) __W,
6289 (__mmask8) __U);
6290}
6291
6292extern __inline __m128i
6293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6294_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A)
6295{
6296 return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A,
6297 (__v4si)
6298 _mm_setzero_si128 (),
6299 (__mmask8) __U);
6300}
6301
6302extern __inline __m128i
6303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6304_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
6305{
6306 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
6307 (__v4si) __W,
6308 (__mmask8)
6309 __U);
6310}
6311
6312extern __inline __m128i
6313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6314_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6315{
6316 return (__m128i) __builtin_ia32_expandloadsi128_maskz ((__v4si *) __P,
6317 (__v4si)
6318 _mm_setzero_si128 (),
6319 (__mmask8)
6320 __U);
6321}
6322
6323extern __inline __m256d
6324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6325_mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B)
6326{
6327 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6328 /* idx */ ,
6329 (__v4df) __A,
6330 (__v4df) __B,
c42b0bdf 6331 (__mmask8) -1);
936c0fe4
AI
6332}
6333
6334extern __inline __m256d
6335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6336_mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
6337 __m256d __B)
6338{
6339 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6340 /* idx */ ,
6341 (__v4df) __A,
6342 (__v4df) __B,
6343 (__mmask8)
6344 __U);
6345}
6346
6347extern __inline __m256d
6348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6349_mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
6350 __m256d __B)
6351{
6352 return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
6353 (__v4di) __I
6354 /* idx */ ,
6355 (__v4df) __B,
6356 (__mmask8)
6357 __U);
6358}
6359
6360extern __inline __m256d
6361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6362_mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
6363 __m256d __B)
6364{
6365 return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
6366 /* idx */ ,
6367 (__v4df) __A,
6368 (__v4df) __B,
6369 (__mmask8)
6370 __U);
6371}
6372
6373extern __inline __m256
6374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6375_mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B)
6376{
6377 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6378 /* idx */ ,
6379 (__v8sf) __A,
6380 (__v8sf) __B,
6381 (__mmask8) -1);
6382}
6383
6384extern __inline __m256
6385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6386_mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
6387 __m256 __B)
6388{
6389 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6390 /* idx */ ,
6391 (__v8sf) __A,
6392 (__v8sf) __B,
6393 (__mmask8) __U);
6394}
6395
6396extern __inline __m256
6397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6398_mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
6399 __m256 __B)
6400{
6401 return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
6402 (__v8si) __I
6403 /* idx */ ,
6404 (__v8sf) __B,
6405 (__mmask8) __U);
6406}
6407
6408extern __inline __m256
6409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6410_mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
6411 __m256 __B)
6412{
6413 return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
6414 /* idx */ ,
6415 (__v8sf) __A,
6416 (__v8sf) __B,
6417 (__mmask8)
6418 __U);
6419}
6420
6421extern __inline __m128i
6422__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6423_mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B)
6424{
6425 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6426 /* idx */ ,
6427 (__v2di) __A,
6428 (__v2di) __B,
6429 (__mmask8) -1);
6430}
6431
6432extern __inline __m128i
6433__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6434_mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
6435 __m128i __B)
6436{
6437 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6438 /* idx */ ,
6439 (__v2di) __A,
6440 (__v2di) __B,
6441 (__mmask8) __U);
6442}
6443
6444extern __inline __m128i
6445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6446_mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
6447 __m128i __B)
6448{
6449 return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
6450 (__v2di) __I
6451 /* idx */ ,
6452 (__v2di) __B,
6453 (__mmask8) __U);
6454}
6455
6456extern __inline __m128i
6457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6458_mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
6459 __m128i __B)
6460{
6461 return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
6462 /* idx */ ,
6463 (__v2di) __A,
6464 (__v2di) __B,
6465 (__mmask8)
6466 __U);
6467}
6468
6469extern __inline __m128i
6470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6471_mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B)
6472{
6473 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6474 /* idx */ ,
6475 (__v4si) __A,
6476 (__v4si) __B,
6477 (__mmask8) -1);
6478}
6479
6480extern __inline __m128i
6481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6482_mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
6483 __m128i __B)
6484{
6485 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6486 /* idx */ ,
6487 (__v4si) __A,
6488 (__v4si) __B,
6489 (__mmask8) __U);
6490}
6491
6492extern __inline __m128i
6493__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6494_mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
6495 __m128i __B)
6496{
6497 return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
6498 (__v4si) __I
6499 /* idx */ ,
6500 (__v4si) __B,
6501 (__mmask8) __U);
6502}
6503
6504extern __inline __m128i
6505__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6506_mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
6507 __m128i __B)
6508{
6509 return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
6510 /* idx */ ,
6511 (__v4si) __A,
6512 (__v4si) __B,
6513 (__mmask8)
6514 __U);
6515}
6516
6517extern __inline __m256i
6518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6519_mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B)
6520{
6521 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6522 /* idx */ ,
6523 (__v4di) __A,
6524 (__v4di) __B,
6525 (__mmask8) -1);
6526}
6527
6528extern __inline __m256i
6529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6530_mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
6531 __m256i __B)
6532{
6533 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6534 /* idx */ ,
6535 (__v4di) __A,
6536 (__v4di) __B,
6537 (__mmask8) __U);
6538}
6539
6540extern __inline __m256i
6541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6542_mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
6543 __mmask8 __U, __m256i __B)
6544{
6545 return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
6546 (__v4di) __I
6547 /* idx */ ,
6548 (__v4di) __B,
6549 (__mmask8) __U);
6550}
6551
6552extern __inline __m256i
6553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6554_mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
6555 __m256i __I, __m256i __B)
6556{
6557 return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
6558 /* idx */ ,
6559 (__v4di) __A,
6560 (__v4di) __B,
6561 (__mmask8)
6562 __U);
6563}
6564
6565extern __inline __m256i
6566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6567_mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B)
6568{
6569 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6570 /* idx */ ,
6571 (__v8si) __A,
6572 (__v8si) __B,
6573 (__mmask8) -1);
6574}
6575
6576extern __inline __m256i
6577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6578_mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
6579 __m256i __B)
6580{
6581 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6582 /* idx */ ,
6583 (__v8si) __A,
6584 (__v8si) __B,
6585 (__mmask8) __U);
6586}
6587
6588extern __inline __m256i
6589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6590_mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
6591 __mmask8 __U, __m256i __B)
6592{
6593 return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
6594 (__v8si) __I
6595 /* idx */ ,
6596 (__v8si) __B,
6597 (__mmask8) __U);
6598}
6599
6600extern __inline __m256i
6601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6602_mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
6603 __m256i __I, __m256i __B)
6604{
6605 return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
6606 /* idx */ ,
6607 (__v8si) __A,
6608 (__v8si) __B,
6609 (__mmask8)
6610 __U);
6611}
6612
6613extern __inline __m128d
6614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6615_mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B)
6616{
6617 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6618 /* idx */ ,
6619 (__v2df) __A,
6620 (__v2df) __B,
c42b0bdf 6621 (__mmask8) -1);
936c0fe4
AI
6622}
6623
6624extern __inline __m128d
6625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6626_mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
6627 __m128d __B)
6628{
6629 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6630 /* idx */ ,
6631 (__v2df) __A,
6632 (__v2df) __B,
6633 (__mmask8)
6634 __U);
6635}
6636
6637extern __inline __m128d
6638__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6639_mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
6640 __m128d __B)
6641{
6642 return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
6643 (__v2di) __I
6644 /* idx */ ,
6645 (__v2df) __B,
6646 (__mmask8)
6647 __U);
6648}
6649
6650extern __inline __m128d
6651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6652_mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
6653 __m128d __B)
6654{
6655 return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
6656 /* idx */ ,
6657 (__v2df) __A,
6658 (__v2df) __B,
6659 (__mmask8)
6660 __U);
6661}
6662
6663extern __inline __m128
6664__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6665_mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B)
6666{
6667 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6668 /* idx */ ,
6669 (__v4sf) __A,
6670 (__v4sf) __B,
6671 (__mmask8) -1);
6672}
6673
6674extern __inline __m128
6675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6676_mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
6677 __m128 __B)
6678{
6679 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6680 /* idx */ ,
6681 (__v4sf) __A,
6682 (__v4sf) __B,
6683 (__mmask8) __U);
6684}
6685
6686extern __inline __m128
6687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6688_mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
6689 __m128 __B)
6690{
6691 return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
6692 (__v4si) __I
6693 /* idx */ ,
6694 (__v4sf) __B,
6695 (__mmask8) __U);
6696}
6697
6698extern __inline __m128
6699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6700_mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
6701 __m128 __B)
6702{
6703 return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
6704 /* idx */ ,
6705 (__v4sf) __A,
6706 (__v4sf) __B,
6707 (__mmask8)
6708 __U);
6709}
6710
6711extern __inline __m128i
6712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6713_mm_srav_epi64 (__m128i __X, __m128i __Y)
6714{
6715 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6716 (__v2di) __Y,
6717 (__v2di)
a25a7887 6718 _mm_setzero_si128 (),
936c0fe4
AI
6719 (__mmask8) -1);
6720}
6721
6722extern __inline __m128i
6723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6724_mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6725 __m128i __Y)
6726{
6727 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6728 (__v2di) __Y,
6729 (__v2di) __W,
6730 (__mmask8) __U);
6731}
6732
6733extern __inline __m128i
6734__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6735_mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6736{
6737 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6738 (__v2di) __Y,
6739 (__v2di)
a25a7887 6740 _mm_setzero_si128 (),
936c0fe4
AI
6741 (__mmask8) __U);
6742}
6743
6744extern __inline __m256i
6745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6746_mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6747 __m256i __Y)
6748{
6749 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6750 (__v8si) __Y,
6751 (__v8si) __W,
6752 (__mmask8) __U);
6753}
6754
6755extern __inline __m256i
6756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6757_mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6758{
6759 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6760 (__v8si) __Y,
6761 (__v8si)
6762 _mm256_setzero_si256 (),
6763 (__mmask8) __U);
6764}
6765
6766extern __inline __m128i
6767__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6768_mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6769 __m128i __Y)
6770{
6771 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6772 (__v4si) __Y,
6773 (__v4si) __W,
6774 (__mmask8) __U);
6775}
6776
6777extern __inline __m128i
6778__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6779_mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6780{
6781 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6782 (__v4si) __Y,
6783 (__v4si)
6784 _mm_setzero_si128 (),
6785 (__mmask8) __U);
6786}
6787
6788extern __inline __m256i
6789__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6790_mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6791 __m256i __Y)
6792{
6793 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6794 (__v4di) __Y,
6795 (__v4di) __W,
6796 (__mmask8) __U);
6797}
6798
6799extern __inline __m256i
6800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6801_mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6802{
6803 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6804 (__v4di) __Y,
6805 (__v4di)
6806 _mm256_setzero_si256 (),
6807 (__mmask8) __U);
6808}
6809
6810extern __inline __m128i
6811__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6812_mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6813 __m128i __Y)
6814{
6815 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6816 (__v2di) __Y,
6817 (__v2di) __W,
6818 (__mmask8) __U);
6819}
6820
6821extern __inline __m128i
6822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6823_mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6824{
6825 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6826 (__v2di) __Y,
6827 (__v2di)
a25a7887 6828 _mm_setzero_si128 (),
936c0fe4
AI
6829 (__mmask8) __U);
6830}
6831
6832extern __inline __m256i
6833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6834_mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6835 __m256i __Y)
6836{
6837 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6838 (__v8si) __Y,
6839 (__v8si) __W,
6840 (__mmask8) __U);
6841}
6842
6843extern __inline __m256i
6844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6845_mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6846{
6847 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6848 (__v8si) __Y,
6849 (__v8si)
6850 _mm256_setzero_si256 (),
6851 (__mmask8) __U);
6852}
6853
6854extern __inline __m128i
6855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6856_mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6857 __m128i __Y)
6858{
6859 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6860 (__v4si) __Y,
6861 (__v4si) __W,
6862 (__mmask8) __U);
6863}
6864
6865extern __inline __m128i
6866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6867_mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6868{
6869 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6870 (__v4si) __Y,
6871 (__v4si)
6872 _mm_setzero_si128 (),
6873 (__mmask8) __U);
6874}
6875
6876extern __inline __m256i
6877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6878_mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6879 __m256i __Y)
6880{
6881 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6882 (__v8si) __Y,
6883 (__v8si) __W,
6884 (__mmask8) __U);
6885}
6886
6887extern __inline __m256i
6888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6889_mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6890{
6891 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6892 (__v8si) __Y,
6893 (__v8si)
6894 _mm256_setzero_si256 (),
6895 (__mmask8) __U);
6896}
6897
6898extern __inline __m128i
6899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6900_mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6901 __m128i __Y)
6902{
6903 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6904 (__v4si) __Y,
6905 (__v4si) __W,
6906 (__mmask8) __U);
6907}
6908
6909extern __inline __m128i
6910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6911_mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6912{
6913 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6914 (__v4si) __Y,
6915 (__v4si)
6916 _mm_setzero_si128 (),
6917 (__mmask8) __U);
6918}
6919
6920extern __inline __m256i
6921__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6922_mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6923 __m256i __Y)
6924{
6925 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6926 (__v4di) __Y,
6927 (__v4di) __W,
6928 (__mmask8) __U);
6929}
6930
6931extern __inline __m256i
6932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6933_mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6934{
6935 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6936 (__v4di) __Y,
6937 (__v4di)
6938 _mm256_setzero_si256 (),
6939 (__mmask8) __U);
6940}
6941
6942extern __inline __m128i
6943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6944_mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6945 __m128i __Y)
6946{
6947 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6948 (__v2di) __Y,
6949 (__v2di) __W,
6950 (__mmask8) __U);
6951}
6952
6953extern __inline __m128i
6954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6955_mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6956{
6957 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6958 (__v2di) __Y,
6959 (__v2di)
a25a7887 6960 _mm_setzero_si128 (),
936c0fe4
AI
6961 (__mmask8) __U);
6962}
6963
6964extern __inline __m256i
6965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6966_mm256_rolv_epi32 (__m256i __A, __m256i __B)
6967{
6968 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6969 (__v8si) __B,
6970 (__v8si)
6971 _mm256_setzero_si256 (),
6972 (__mmask8) -1);
6973}
6974
6975extern __inline __m256i
6976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6977_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
6978 __m256i __B)
6979{
6980 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6981 (__v8si) __B,
6982 (__v8si) __W,
6983 (__mmask8) __U);
6984}
6985
6986extern __inline __m256i
6987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6988_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
6989{
6990 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6991 (__v8si) __B,
6992 (__v8si)
6993 _mm256_setzero_si256 (),
6994 (__mmask8) __U);
6995}
6996
6997extern __inline __m128i
6998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6999_mm_rolv_epi32 (__m128i __A, __m128i __B)
7000{
7001 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7002 (__v4si) __B,
7003 (__v4si)
7004 _mm_setzero_si128 (),
7005 (__mmask8) -1);
7006}
7007
7008extern __inline __m128i
7009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7010_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
7011 __m128i __B)
7012{
7013 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7014 (__v4si) __B,
7015 (__v4si) __W,
7016 (__mmask8) __U);
7017}
7018
7019extern __inline __m128i
7020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7021_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
7022{
7023 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7024 (__v4si) __B,
7025 (__v4si)
7026 _mm_setzero_si128 (),
7027 (__mmask8) __U);
7028}
7029
7030extern __inline __m256i
7031__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7032_mm256_rorv_epi32 (__m256i __A, __m256i __B)
7033{
7034 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7035 (__v8si) __B,
7036 (__v8si)
7037 _mm256_setzero_si256 (),
7038 (__mmask8) -1);
7039}
7040
7041extern __inline __m256i
7042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7043_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
7044 __m256i __B)
7045{
7046 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7047 (__v8si) __B,
7048 (__v8si) __W,
7049 (__mmask8) __U);
7050}
7051
7052extern __inline __m256i
7053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7054_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
7055{
7056 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7057 (__v8si) __B,
7058 (__v8si)
7059 _mm256_setzero_si256 (),
7060 (__mmask8) __U);
7061}
7062
7063extern __inline __m128i
7064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7065_mm_rorv_epi32 (__m128i __A, __m128i __B)
7066{
7067 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7068 (__v4si) __B,
7069 (__v4si)
7070 _mm_setzero_si128 (),
7071 (__mmask8) -1);
7072}
7073
7074extern __inline __m128i
7075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7076_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
7077 __m128i __B)
7078{
7079 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7080 (__v4si) __B,
7081 (__v4si) __W,
7082 (__mmask8) __U);
7083}
7084
7085extern __inline __m128i
7086__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7087_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
7088{
7089 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7090 (__v4si) __B,
7091 (__v4si)
7092 _mm_setzero_si128 (),
7093 (__mmask8) __U);
7094}
7095
7096extern __inline __m256i
7097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7098_mm256_rolv_epi64 (__m256i __A, __m256i __B)
7099{
7100 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7101 (__v4di) __B,
7102 (__v4di)
7103 _mm256_setzero_si256 (),
7104 (__mmask8) -1);
7105}
7106
7107extern __inline __m256i
7108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7109_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7110 __m256i __B)
7111{
7112 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7113 (__v4di) __B,
7114 (__v4di) __W,
7115 (__mmask8) __U);
7116}
7117
7118extern __inline __m256i
7119__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7120_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7121{
7122 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7123 (__v4di) __B,
7124 (__v4di)
7125 _mm256_setzero_si256 (),
7126 (__mmask8) __U);
7127}
7128
7129extern __inline __m128i
7130__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7131_mm_rolv_epi64 (__m128i __A, __m128i __B)
7132{
7133 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7134 (__v2di) __B,
7135 (__v2di)
a25a7887 7136 _mm_setzero_si128 (),
936c0fe4
AI
7137 (__mmask8) -1);
7138}
7139
7140extern __inline __m128i
7141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7142_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7143 __m128i __B)
7144{
7145 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7146 (__v2di) __B,
7147 (__v2di) __W,
7148 (__mmask8) __U);
7149}
7150
7151extern __inline __m128i
7152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7153_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7154{
7155 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7156 (__v2di) __B,
7157 (__v2di)
a25a7887 7158 _mm_setzero_si128 (),
936c0fe4
AI
7159 (__mmask8) __U);
7160}
7161
7162extern __inline __m256i
7163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7164_mm256_rorv_epi64 (__m256i __A, __m256i __B)
7165{
7166 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7167 (__v4di) __B,
7168 (__v4di)
7169 _mm256_setzero_si256 (),
7170 (__mmask8) -1);
7171}
7172
7173extern __inline __m256i
7174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7175_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7176 __m256i __B)
7177{
7178 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7179 (__v4di) __B,
7180 (__v4di) __W,
7181 (__mmask8) __U);
7182}
7183
7184extern __inline __m256i
7185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7186_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7187{
7188 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7189 (__v4di) __B,
7190 (__v4di)
7191 _mm256_setzero_si256 (),
7192 (__mmask8) __U);
7193}
7194
7195extern __inline __m128i
7196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7197_mm_rorv_epi64 (__m128i __A, __m128i __B)
7198{
7199 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7200 (__v2di) __B,
7201 (__v2di)
a25a7887 7202 _mm_setzero_si128 (),
936c0fe4
AI
7203 (__mmask8) -1);
7204}
7205
7206extern __inline __m128i
7207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7208_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7209 __m128i __B)
7210{
7211 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7212 (__v2di) __B,
7213 (__v2di) __W,
7214 (__mmask8) __U);
7215}
7216
7217extern __inline __m128i
7218__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7219_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7220{
7221 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7222 (__v2di) __B,
7223 (__v2di)
a25a7887 7224 _mm_setzero_si128 (),
936c0fe4
AI
7225 (__mmask8) __U);
7226}
7227
7228extern __inline __m256i
7229__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7230_mm256_srav_epi64 (__m256i __X, __m256i __Y)
7231{
7232 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7233 (__v4di) __Y,
7234 (__v4di)
7235 _mm256_setzero_si256 (),
7236 (__mmask8) -1);
7237}
7238
7239extern __inline __m256i
7240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7241_mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
7242 __m256i __Y)
7243{
7244 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7245 (__v4di) __Y,
7246 (__v4di) __W,
7247 (__mmask8) __U);
7248}
7249
7250extern __inline __m256i
7251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7252_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
7253{
7254 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7255 (__v4di) __Y,
7256 (__v4di)
7257 _mm256_setzero_si256 (),
7258 (__mmask8) __U);
7259}
7260
7261extern __inline __m256i
7262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7263_mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7264 __m256i __B)
7265{
7266 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7267 (__v4di) __B,
7268 (__v4di) __W, __U);
7269}
7270
7271extern __inline __m256i
7272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7273_mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7274{
7275 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7276 (__v4di) __B,
7277 (__v4di)
7278 _mm256_setzero_pd (),
7279 __U);
7280}
7281
7282extern __inline __m128i
7283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7284_mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7285 __m128i __B)
7286{
7287 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7288 (__v2di) __B,
7289 (__v2di) __W, __U);
7290}
7291
7292extern __inline __m128i
7293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7294_mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7295{
7296 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7297 (__v2di) __B,
7298 (__v2di)
7299 _mm_setzero_pd (),
7300 __U);
7301}
7302
7303extern __inline __m256i
7304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7305_mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7306 __m256i __B)
7307{
7308 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7309 (__v4di) __B,
7310 (__v4di) __W, __U);
7311}
7312
7313extern __inline __m256i
7314__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7315_mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7316{
7317 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7318 (__v4di) __B,
7319 (__v4di)
7320 _mm256_setzero_pd (),
7321 __U);
7322}
7323
7324extern __inline __m128i
7325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7326_mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7327 __m128i __B)
7328{
7329 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7330 (__v2di) __B,
7331 (__v2di) __W, __U);
7332}
7333
7334extern __inline __m128i
7335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7336_mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7337{
7338 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7339 (__v2di) __B,
7340 (__v2di)
7341 _mm_setzero_pd (),
7342 __U);
7343}
7344
7345extern __inline __m256i
7346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7347_mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7348 __m256i __B)
7349{
7350 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7351 (__v4di) __B,
7352 (__v4di) __W,
7353 (__mmask8) __U);
7354}
7355
7356extern __inline __m256i
7357__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7358_mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7359{
7360 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7361 (__v4di) __B,
7362 (__v4di)
7363 _mm256_setzero_si256 (),
7364 (__mmask8) __U);
7365}
7366
01fd9f8d
L
7367extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7368_mm256_or_epi64 (__m256i __A, __m256i __B)
7369{
7370 return (__m256i) ((__v4du)__A | (__v4du)__B);
7371}
7372
936c0fe4
AI
7373extern __inline __m128i
7374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7375_mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
7376{
7377 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7378 (__v2di) __B,
7379 (__v2di) __W,
7380 (__mmask8) __U);
7381}
7382
7383extern __inline __m128i
7384__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7385_mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7386{
7387 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7388 (__v2di) __B,
7389 (__v2di)
7390 _mm_setzero_si128 (),
7391 (__mmask8) __U);
7392}
7393
01fd9f8d
L
7394extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7395_mm_or_epi64 (__m128i __A, __m128i __B)
7396{
7397 return (__m128i) ((__v2du)__A | (__v2du)__B);
7398}
7399
936c0fe4
AI
7400extern __inline __m256i
7401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7402_mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7403 __m256i __B)
7404{
7405 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7406 (__v4di) __B,
7407 (__v4di) __W,
7408 (__mmask8) __U);
7409}
7410
7411extern __inline __m256i
7412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7413_mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7414{
7415 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7416 (__v4di) __B,
7417 (__v4di)
7418 _mm256_setzero_si256 (),
7419 (__mmask8) __U);
7420}
7421
01fd9f8d
L
7422extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7423_mm256_xor_epi64 (__m256i __A, __m256i __B)
7424{
7425 return (__m256i) ((__v4du)__A ^ (__v4du)__B);
7426}
7427
936c0fe4
AI
7428extern __inline __m128i
7429__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7430_mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7431 __m128i __B)
7432{
7433 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7434 (__v2di) __B,
7435 (__v2di) __W,
7436 (__mmask8) __U);
7437}
7438
7439extern __inline __m128i
7440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7441_mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7442{
7443 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7444 (__v2di) __B,
7445 (__v2di)
7446 _mm_setzero_si128 (),
7447 (__mmask8) __U);
7448}
7449
01fd9f8d
L
7450extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7451_mm_xor_epi64 (__m128i __A, __m128i __B)
7452{
7453 return (__m128i) ((__v2du)__A ^ (__v2du)__B);
7454}
7455
936c0fe4
AI
7456extern __inline __m256d
7457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7458_mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
7459 __m256d __B)
7460{
7461 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7462 (__v4df) __B,
7463 (__v4df) __W,
7464 (__mmask8) __U);
7465}
7466
7467extern __inline __m256d
7468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7469_mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B)
7470{
7471 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7472 (__v4df) __B,
7473 (__v4df)
7474 _mm256_setzero_pd (),
7475 (__mmask8) __U);
7476}
7477
7478extern __inline __m256
7479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7480_mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7481{
7482 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7483 (__v8sf) __B,
7484 (__v8sf) __W,
7485 (__mmask8) __U);
7486}
7487
7488extern __inline __m256
7489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7490_mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B)
7491{
7492 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7493 (__v8sf) __B,
7494 (__v8sf)
7495 _mm256_setzero_ps (),
7496 (__mmask8) __U);
7497}
7498
7499extern __inline __m128
7500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7501_mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7502{
7503 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7504 (__v4sf) __B,
7505 (__v4sf) __W,
7506 (__mmask8) __U);
7507}
7508
7509extern __inline __m128
7510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7511_mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B)
7512{
7513 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7514 (__v4sf) __B,
7515 (__v4sf)
7516 _mm_setzero_ps (),
7517 (__mmask8) __U);
7518}
7519
7520extern __inline __m128d
7521__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7522_mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7523{
7524 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7525 (__v2df) __B,
7526 (__v2df) __W,
7527 (__mmask8) __U);
7528}
7529
7530extern __inline __m128d
7531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7532_mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B)
7533{
7534 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7535 (__v2df) __B,
7536 (__v2df)
7537 _mm_setzero_pd (),
7538 (__mmask8) __U);
7539}
7540
7541extern __inline __m256d
7542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7543_mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
7544 __m256d __B)
7545{
7546 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7547 (__v4df) __B,
7548 (__v4df) __W,
7549 (__mmask8) __U);
7550}
7551
7552extern __inline __m256d
7553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7554_mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
7555 __m256d __B)
7556{
7557 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7558 (__v4df) __B,
7559 (__v4df) __W,
7560 (__mmask8) __U);
7561}
7562
7563extern __inline __m256d
7564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7565_mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B)
7566{
7567 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7568 (__v4df) __B,
7569 (__v4df)
7570 _mm256_setzero_pd (),
7571 (__mmask8) __U);
7572}
7573
7574extern __inline __m256
7575__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7576_mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7577{
7578 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7579 (__v8sf) __B,
7580 (__v8sf) __W,
7581 (__mmask8) __U);
7582}
7583
7584extern __inline __m256d
7585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7586_mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B)
7587{
7588 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7589 (__v4df) __B,
7590 (__v4df)
7591 _mm256_setzero_pd (),
7592 (__mmask8) __U);
7593}
7594
7595extern __inline __m256
7596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7597_mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7598{
7599 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7600 (__v8sf) __B,
7601 (__v8sf) __W,
7602 (__mmask8) __U);
7603}
7604
7605extern __inline __m256
7606__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7607_mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B)
7608{
7609 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7610 (__v8sf) __B,
7611 (__v8sf)
7612 _mm256_setzero_ps (),
7613 (__mmask8) __U);
7614}
7615
7616extern __inline __m256
7617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7618_mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B)
7619{
7620 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7621 (__v8sf) __B,
7622 (__v8sf)
7623 _mm256_setzero_ps (),
7624 (__mmask8) __U);
7625}
7626
7627extern __inline __m128
7628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7629_mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7630{
7631 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7632 (__v4sf) __B,
7633 (__v4sf) __W,
7634 (__mmask8) __U);
7635}
7636
7637extern __inline __m128
7638__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7639_mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7640{
7641 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7642 (__v4sf) __B,
7643 (__v4sf) __W,
7644 (__mmask8) __U);
7645}
7646
7647extern __inline __m128
7648__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7649_mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B)
7650{
7651 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7652 (__v4sf) __B,
7653 (__v4sf)
7654 _mm_setzero_ps (),
7655 (__mmask8) __U);
7656}
7657
7658extern __inline __m128
7659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7660_mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B)
7661{
7662 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7663 (__v4sf) __B,
7664 (__v4sf)
7665 _mm_setzero_ps (),
7666 (__mmask8) __U);
7667}
7668
7669extern __inline __m128
7670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7671_mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7672{
7673 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7674 (__v4sf) __B,
7675 (__v4sf) __W,
7676 (__mmask8) __U);
7677}
7678
7679extern __inline __m128
7680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7681_mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B)
7682{
7683 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7684 (__v4sf) __B,
7685 (__v4sf)
7686 _mm_setzero_ps (),
7687 (__mmask8) __U);
7688}
7689
7690extern __inline __m128d
7691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7692_mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7693{
7694 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7695 (__v2df) __B,
7696 (__v2df) __W,
7697 (__mmask8) __U);
7698}
7699
7700extern __inline __m128d
7701__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7702_mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B)
7703{
7704 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7705 (__v2df) __B,
7706 (__v2df)
7707 _mm_setzero_pd (),
7708 (__mmask8) __U);
7709}
7710
7711extern __inline __m128d
7712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7713_mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7714{
7715 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7716 (__v2df) __B,
7717 (__v2df) __W,
7718 (__mmask8) __U);
7719}
7720
7721extern __inline __m128d
7722__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7723_mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B)
7724{
7725 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7726 (__v2df) __B,
7727 (__v2df)
7728 _mm_setzero_pd (),
7729 (__mmask8) __U);
7730}
7731
7732extern __inline __m128d
7733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7734_mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7735{
7736 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7737 (__v2df) __B,
7738 (__v2df) __W,
7739 (__mmask8) __U);
7740}
7741
7742extern __inline __m128d
7743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7744_mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B)
7745{
7746 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7747 (__v2df) __B,
7748 (__v2df)
7749 _mm_setzero_pd (),
7750 (__mmask8) __U);
7751}
7752
7753extern __inline __m256
7754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7755_mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7756{
7757 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7758 (__v8sf) __B,
7759 (__v8sf) __W,
7760 (__mmask8) __U);
7761}
7762
7763extern __inline __m256
7764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7765_mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B)
7766{
7767 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7768 (__v8sf) __B,
7769 (__v8sf)
7770 _mm256_setzero_ps (),
7771 (__mmask8) __U);
7772}
7773
7774extern __inline __m256d
7775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7776_mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
7777 __m256d __B)
7778{
7779 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7780 (__v4df) __B,
7781 (__v4df) __W,
7782 (__mmask8) __U);
7783}
7784
7785extern __inline __m256d
7786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7787_mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B)
7788{
7789 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7790 (__v4df) __B,
7791 (__v4df)
7792 _mm256_setzero_pd (),
7793 (__mmask8) __U);
7794}
7795
7796extern __inline __m256i
7797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7798_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7799{
7800 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7801 (__v4di) __B,
7802 (__v4di)
7803 _mm256_setzero_si256 (),
7804 __M);
7805}
7806
7807extern __inline __m256i
7808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7809_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7810 __m256i __B)
7811{
7812 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7813 (__v4di) __B,
7814 (__v4di) __W, __M);
7815}
7816
7817extern __inline __m256i
7818__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7819_mm256_min_epi64 (__m256i __A, __m256i __B)
7820{
7821 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7822 (__v4di) __B,
7823 (__v4di)
7824 _mm256_setzero_si256 (),
7825 (__mmask8) -1);
7826}
7827
7828extern __inline __m256i
7829__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7830_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7831 __m256i __B)
7832{
7833 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7834 (__v4di) __B,
7835 (__v4di) __W, __M);
7836}
7837
7838extern __inline __m256i
7839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7840_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7841{
7842 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7843 (__v4di) __B,
7844 (__v4di)
7845 _mm256_setzero_si256 (),
7846 __M);
7847}
7848
7849extern __inline __m256i
7850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7851_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7852{
7853 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7854 (__v4di) __B,
7855 (__v4di)
7856 _mm256_setzero_si256 (),
7857 __M);
7858}
7859
7860extern __inline __m256i
7861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7862_mm256_max_epi64 (__m256i __A, __m256i __B)
7863{
7864 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7865 (__v4di) __B,
7866 (__v4di)
7867 _mm256_setzero_si256 (),
7868 (__mmask8) -1);
7869}
7870
7871extern __inline __m256i
7872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7873_mm256_max_epu64 (__m256i __A, __m256i __B)
7874{
7875 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7876 (__v4di) __B,
7877 (__v4di)
7878 _mm256_setzero_si256 (),
7879 (__mmask8) -1);
7880}
7881
7882extern __inline __m256i
7883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7884_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7885 __m256i __B)
7886{
7887 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7888 (__v4di) __B,
7889 (__v4di) __W, __M);
7890}
7891
7892extern __inline __m256i
7893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7894_mm256_min_epu64 (__m256i __A, __m256i __B)
7895{
7896 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7897 (__v4di) __B,
7898 (__v4di)
7899 _mm256_setzero_si256 (),
7900 (__mmask8) -1);
7901}
7902
7903extern __inline __m256i
7904__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7905_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7906 __m256i __B)
7907{
7908 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7909 (__v4di) __B,
7910 (__v4di) __W, __M);
7911}
7912
7913extern __inline __m256i
7914__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7915_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7916{
7917 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7918 (__v4di) __B,
7919 (__v4di)
7920 _mm256_setzero_si256 (),
7921 __M);
7922}
7923
7924extern __inline __m256i
7925__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7926_mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7927{
7928 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7929 (__v8si) __B,
7930 (__v8si)
7931 _mm256_setzero_si256 (),
7932 __M);
7933}
7934
7935extern __inline __m256i
7936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7937_mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7938 __m256i __B)
7939{
7940 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7941 (__v8si) __B,
7942 (__v8si) __W, __M);
7943}
7944
7945extern __inline __m256i
7946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7947_mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7948{
7949 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7950 (__v8si) __B,
7951 (__v8si)
7952 _mm256_setzero_si256 (),
7953 __M);
7954}
7955
7956extern __inline __m256i
7957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7958_mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7959 __m256i __B)
7960{
7961 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7962 (__v8si) __B,
7963 (__v8si) __W, __M);
7964}
7965
7966extern __inline __m256i
7967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7968_mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7969{
7970 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7971 (__v8si) __B,
7972 (__v8si)
7973 _mm256_setzero_si256 (),
7974 __M);
7975}
7976
7977extern __inline __m256i
7978__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7979_mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
7980 __m256i __B)
7981{
7982 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7983 (__v8si) __B,
7984 (__v8si) __W, __M);
7985}
7986
7987extern __inline __m256i
7988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7989_mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7990{
7991 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
7992 (__v8si) __B,
7993 (__v8si)
7994 _mm256_setzero_si256 (),
7995 __M);
7996}
7997
7998extern __inline __m256i
7999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8000_mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
8001 __m256i __B)
8002{
8003 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
8004 (__v8si) __B,
8005 (__v8si) __W, __M);
8006}
8007
8008extern __inline __m128i
8009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8010_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
8011{
8012 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8013 (__v2di) __B,
8014 (__v2di)
8015 _mm_setzero_si128 (),
8016 __M);
8017}
8018
8019extern __inline __m128i
8020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8021_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
8022 __m128i __B)
8023{
8024 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8025 (__v2di) __B,
8026 (__v2di) __W, __M);
8027}
8028
8029extern __inline __m128i
8030__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8031_mm_min_epi64 (__m128i __A, __m128i __B)
8032{
8033 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8034 (__v2di) __B,
8035 (__v2di)
a25a7887 8036 _mm_setzero_si128 (),
936c0fe4
AI
8037 (__mmask8) -1);
8038}
8039
8040extern __inline __m128i
8041__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8042_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
8043 __m128i __B)
8044{
8045 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8046 (__v2di) __B,
8047 (__v2di) __W, __M);
8048}
8049
8050extern __inline __m128i
8051__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8052_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
8053{
8054 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8055 (__v2di) __B,
8056 (__v2di)
8057 _mm_setzero_si128 (),
8058 __M);
8059}
8060
8061extern __inline __m128i
8062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8063_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8064{
8065 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8066 (__v2di) __B,
8067 (__v2di)
8068 _mm_setzero_si128 (),
8069 __M);
8070}
8071
8072extern __inline __m128i
8073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8074_mm_max_epi64 (__m128i __A, __m128i __B)
8075{
8076 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8077 (__v2di) __B,
8078 (__v2di)
a25a7887 8079 _mm_setzero_si128 (),
936c0fe4
AI
8080 (__mmask8) -1);
8081}
8082
8083extern __inline __m128i
8084__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8085_mm_max_epu64 (__m128i __A, __m128i __B)
8086{
8087 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8088 (__v2di) __B,
8089 (__v2di)
a25a7887 8090 _mm_setzero_si128 (),
936c0fe4
AI
8091 (__mmask8) -1);
8092}
8093
8094extern __inline __m128i
8095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8096_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8097 __m128i __B)
8098{
8099 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8100 (__v2di) __B,
8101 (__v2di) __W, __M);
8102}
8103
8104extern __inline __m128i
8105__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8106_mm_min_epu64 (__m128i __A, __m128i __B)
8107{
8108 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8109 (__v2di) __B,
8110 (__v2di)
a25a7887 8111 _mm_setzero_si128 (),
936c0fe4
AI
8112 (__mmask8) -1);
8113}
8114
8115extern __inline __m128i
8116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8117_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8118 __m128i __B)
8119{
8120 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8121 (__v2di) __B,
8122 (__v2di) __W, __M);
8123}
8124
8125extern __inline __m128i
8126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8127_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8128{
8129 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8130 (__v2di) __B,
8131 (__v2di)
8132 _mm_setzero_si128 (),
8133 __M);
8134}
8135
8136extern __inline __m128i
8137__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8138_mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8139{
8140 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8141 (__v4si) __B,
8142 (__v4si)
8143 _mm_setzero_si128 (),
8144 __M);
8145}
8146
8147extern __inline __m128i
8148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8149_mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8150 __m128i __B)
8151{
8152 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8153 (__v4si) __B,
8154 (__v4si) __W, __M);
8155}
8156
8157extern __inline __m128i
8158__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8159_mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8160{
8161 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8162 (__v4si) __B,
8163 (__v4si)
8164 _mm_setzero_si128 (),
8165 __M);
8166}
8167
8168extern __inline __m128i
8169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8170_mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8171 __m128i __B)
8172{
8173 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8174 (__v4si) __B,
8175 (__v4si) __W, __M);
8176}
8177
8178extern __inline __m128i
8179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8180_mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8181{
8182 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8183 (__v4si) __B,
8184 (__v4si)
8185 _mm_setzero_si128 (),
8186 __M);
8187}
8188
8189extern __inline __m128i
8190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8191_mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8192 __m128i __B)
8193{
8194 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8195 (__v4si) __B,
8196 (__v4si) __W, __M);
8197}
8198
8199extern __inline __m128i
8200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8201_mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8202{
8203 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8204 (__v4si) __B,
8205 (__v4si)
8206 _mm_setzero_si128 (),
8207 __M);
8208}
8209
8210extern __inline __m128i
8211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8212_mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8213 __m128i __B)
8214{
8215 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8216 (__v4si) __B,
8217 (__v4si) __W, __M);
8218}
8219
8220#ifndef __AVX512CD__
8221#pragma GCC push_options
8222#pragma GCC target("avx512vl,avx512cd")
8223#define __DISABLE_AVX512VLCD__
8224#endif
8225
8226extern __inline __m128i
8227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8228_mm_broadcastmb_epi64 (__mmask8 __A)
8229{
8230 return (__m128i) __builtin_ia32_broadcastmb128 (__A);
8231}
8232
8233extern __inline __m256i
8234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8235_mm256_broadcastmb_epi64 (__mmask8 __A)
8236{
8237 return (__m256i) __builtin_ia32_broadcastmb256 (__A);
8238}
8239
8240extern __inline __m128i
8241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8242_mm_broadcastmw_epi32 (__mmask16 __A)
8243{
8244 return (__m128i) __builtin_ia32_broadcastmw128 (__A);
8245}
8246
8247extern __inline __m256i
8248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8249_mm256_broadcastmw_epi32 (__mmask16 __A)
8250{
8251 return (__m256i) __builtin_ia32_broadcastmw256 (__A);
8252}
8253
8254extern __inline __m256i
8255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8256_mm256_lzcnt_epi32 (__m256i __A)
8257{
8258 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8259 (__v8si)
8260 _mm256_setzero_si256 (),
8261 (__mmask8) -1);
8262}
8263
8264extern __inline __m256i
8265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8266_mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8267{
8268 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8269 (__v8si) __W,
8270 (__mmask8) __U);
8271}
8272
8273extern __inline __m256i
8274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8275_mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
8276{
8277 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8278 (__v8si)
8279 _mm256_setzero_si256 (),
8280 (__mmask8) __U);
8281}
8282
8283extern __inline __m256i
8284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8285_mm256_lzcnt_epi64 (__m256i __A)
8286{
8287 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8288 (__v4di)
8289 _mm256_setzero_si256 (),
8290 (__mmask8) -1);
8291}
8292
8293extern __inline __m256i
8294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8295_mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8296{
8297 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8298 (__v4di) __W,
8299 (__mmask8) __U);
8300}
8301
8302extern __inline __m256i
8303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8304_mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
8305{
8306 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8307 (__v4di)
8308 _mm256_setzero_si256 (),
8309 (__mmask8) __U);
8310}
8311
8312extern __inline __m256i
8313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8314_mm256_conflict_epi64 (__m256i __A)
8315{
8316 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8317 (__v4di)
8318 _mm256_setzero_si256 (),
c42b0bdf 8319 (__mmask8) -1);
936c0fe4
AI
8320}
8321
8322extern __inline __m256i
8323__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8324_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8325{
8326 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8327 (__v4di) __W,
8328 (__mmask8)
8329 __U);
8330}
8331
8332extern __inline __m256i
8333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8334_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
8335{
8336 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8337 (__v4di)
8338 _mm256_setzero_si256 (),
8339 (__mmask8)
8340 __U);
8341}
8342
8343extern __inline __m256i
8344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8345_mm256_conflict_epi32 (__m256i __A)
8346{
8347 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8348 (__v8si)
8349 _mm256_setzero_si256 (),
c42b0bdf 8350 (__mmask8) -1);
936c0fe4
AI
8351}
8352
8353extern __inline __m256i
8354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8355_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8356{
8357 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8358 (__v8si) __W,
8359 (__mmask8)
8360 __U);
8361}
8362
8363extern __inline __m256i
8364__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8365_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
8366{
8367 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8368 (__v8si)
8369 _mm256_setzero_si256 (),
8370 (__mmask8)
8371 __U);
8372}
8373
8374extern __inline __m128i
8375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8376_mm_lzcnt_epi32 (__m128i __A)
8377{
8378 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8379 (__v4si)
8380 _mm_setzero_si128 (),
8381 (__mmask8) -1);
8382}
8383
8384extern __inline __m128i
8385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8386_mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8387{
8388 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8389 (__v4si) __W,
8390 (__mmask8) __U);
8391}
8392
8393extern __inline __m128i
8394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8395_mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
8396{
8397 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8398 (__v4si)
8399 _mm_setzero_si128 (),
8400 (__mmask8) __U);
8401}
8402
8403extern __inline __m128i
8404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8405_mm_lzcnt_epi64 (__m128i __A)
8406{
8407 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8408 (__v2di)
a25a7887 8409 _mm_setzero_si128 (),
936c0fe4
AI
8410 (__mmask8) -1);
8411}
8412
8413extern __inline __m128i
8414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8415_mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8416{
8417 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8418 (__v2di) __W,
8419 (__mmask8) __U);
8420}
8421
8422extern __inline __m128i
8423__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8424_mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
8425{
8426 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8427 (__v2di)
a25a7887 8428 _mm_setzero_si128 (),
936c0fe4
AI
8429 (__mmask8) __U);
8430}
8431
8432extern __inline __m128i
8433__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8434_mm_conflict_epi64 (__m128i __A)
8435{
8436 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8437 (__v2di)
a25a7887 8438 _mm_setzero_si128 (),
c42b0bdf 8439 (__mmask8) -1);
936c0fe4
AI
8440}
8441
8442extern __inline __m128i
8443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8444_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8445{
8446 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8447 (__v2di) __W,
8448 (__mmask8)
8449 __U);
8450}
8451
8452extern __inline __m128i
8453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8454_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
8455{
8456 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8457 (__v2di)
a25a7887 8458 _mm_setzero_si128 (),
936c0fe4
AI
8459 (__mmask8)
8460 __U);
8461}
8462
8463extern __inline __m128i
8464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8465_mm_conflict_epi32 (__m128i __A)
8466{
8467 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8468 (__v4si)
8469 _mm_setzero_si128 (),
c42b0bdf 8470 (__mmask8) -1);
936c0fe4
AI
8471}
8472
8473extern __inline __m128i
8474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8475_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8476{
8477 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8478 (__v4si) __W,
8479 (__mmask8)
8480 __U);
8481}
8482
8483extern __inline __m128i
8484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8485_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
8486{
8487 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8488 (__v4si)
8489 _mm_setzero_si128 (),
8490 (__mmask8)
8491 __U);
8492}
8493
8494#ifdef __DISABLE_AVX512VLCD__
8495#pragma GCC pop_options
8496#endif
8497
8498extern __inline __m256d
8499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8500_mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A,
8501 __m256d __B)
8502{
8503 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8504 (__v4df) __B,
8505 (__v4df) __W,
8506 (__mmask8) __U);
8507}
8508
8509extern __inline __m256d
8510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8511_mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
8512{
8513 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8514 (__v4df) __B,
8515 (__v4df)
8516 _mm256_setzero_pd (),
8517 (__mmask8) __U);
8518}
8519
8520extern __inline __m128d
8521__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8522_mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A,
8523 __m128d __B)
8524{
8525 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8526 (__v2df) __B,
8527 (__v2df) __W,
8528 (__mmask8) __U);
8529}
8530
8531extern __inline __m128d
8532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8533_mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
8534{
8535 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8536 (__v2df) __B,
8537 (__v2df)
8538 _mm_setzero_pd (),
8539 (__mmask8) __U);
8540}
8541
8542extern __inline __m256
8543__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8544_mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A,
8545 __m256 __B)
8546{
8547 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8548 (__v8sf) __B,
8549 (__v8sf) __W,
8550 (__mmask8) __U);
8551}
8552
8553extern __inline __m256d
8554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8555_mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A,
8556 __m256d __B)
8557{
8558 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8559 (__v4df) __B,
8560 (__v4df) __W,
8561 (__mmask8) __U);
8562}
8563
8564extern __inline __m256d
8565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8566_mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
8567{
8568 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8569 (__v4df) __B,
8570 (__v4df)
8571 _mm256_setzero_pd (),
8572 (__mmask8) __U);
8573}
8574
8575extern __inline __m128d
8576__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8577_mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A,
8578 __m128d __B)
8579{
8580 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8581 (__v2df) __B,
8582 (__v2df) __W,
8583 (__mmask8) __U);
8584}
8585
8586extern __inline __m128d
8587__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8588_mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
8589{
8590 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8591 (__v2df) __B,
8592 (__v2df)
8593 _mm_setzero_pd (),
8594 (__mmask8) __U);
8595}
8596
8597extern __inline __m256
8598__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8599_mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A,
8600 __m256 __B)
8601{
8602 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8603 (__v8sf) __B,
8604 (__v8sf) __W,
8605 (__mmask8) __U);
8606}
8607
8608extern __inline __m256
8609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8610_mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
8611{
8612 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8613 (__v8sf) __B,
8614 (__v8sf)
8615 _mm256_setzero_ps (),
8616 (__mmask8) __U);
8617}
8618
8619extern __inline __m128
8620__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8621_mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8622{
8623 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8624 (__v4sf) __B,
8625 (__v4sf) __W,
8626 (__mmask8) __U);
8627}
8628
8629extern __inline __m128
8630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8631_mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
8632{
8633 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8634 (__v4sf) __B,
8635 (__v4sf)
8636 _mm_setzero_ps (),
8637 (__mmask8) __U);
8638}
8639
8640extern __inline __m128
8641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8642_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8643{
8644 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8645 (__v4sf) __W,
8646 (__mmask8) __U);
8647}
8648
8649extern __inline __m128
8650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8651_mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8652{
8653 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8654 (__v4sf)
8655 _mm_setzero_ps (),
8656 (__mmask8) __U);
8657}
8658
8659extern __inline __m256
8660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8661_mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
8662{
8663 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8664 (__v8sf) __B,
8665 (__v8sf)
8666 _mm256_setzero_ps (),
8667 (__mmask8) __U);
8668}
8669
8670extern __inline __m256
8671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8672_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8673{
8674 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8675 (__v8sf) __W,
8676 (__mmask8) __U);
8677}
8678
8679extern __inline __m256
8680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8681_mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8682{
8683 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8684 (__v8sf)
8685 _mm256_setzero_ps (),
8686 (__mmask8) __U);
8687}
8688
8689extern __inline __m128
8690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8691_mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8692{
8693 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8694 (__v4sf) __B,
8695 (__v4sf) __W,
8696 (__mmask8) __U);
8697}
8698
8699extern __inline __m128
8700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8701_mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
8702{
8703 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8704 (__v4sf) __B,
8705 (__v4sf)
8706 _mm_setzero_ps (),
8707 (__mmask8) __U);
8708}
8709
8710extern __inline __m256i
8711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8712_mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8713 __m128i __B)
8714{
8715 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8716 (__v4si) __B,
8717 (__v8si) __W,
8718 (__mmask8) __U);
8719}
8720
8721extern __inline __m256i
8722__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8723_mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8724{
8725 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8726 (__v4si) __B,
8727 (__v8si)
8728 _mm256_setzero_si256 (),
8729 (__mmask8) __U);
8730}
8731
8732extern __inline __m128i
8733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8734_mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8735 __m128i __B)
8736{
8737 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8738 (__v4si) __B,
8739 (__v4si) __W,
8740 (__mmask8) __U);
8741}
8742
8743extern __inline __m128i
8744__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8745_mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8746{
8747 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8748 (__v4si) __B,
8749 (__v4si)
8750 _mm_setzero_si128 (),
8751 (__mmask8) __U);
8752}
8753
8754extern __inline __m256i
8755__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8756_mm256_sra_epi64 (__m256i __A, __m128i __B)
8757{
8758 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8759 (__v2di) __B,
8760 (__v4di)
8761 _mm256_setzero_si256 (),
8762 (__mmask8) -1);
8763}
8764
8765extern __inline __m256i
8766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8767_mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8768 __m128i __B)
8769{
8770 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8771 (__v2di) __B,
8772 (__v4di) __W,
8773 (__mmask8) __U);
8774}
8775
8776extern __inline __m256i
8777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8778_mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8779{
8780 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8781 (__v2di) __B,
8782 (__v4di)
8783 _mm256_setzero_si256 (),
8784 (__mmask8) __U);
8785}
8786
8787extern __inline __m128i
8788__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8789_mm_sra_epi64 (__m128i __A, __m128i __B)
8790{
8791 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8792 (__v2di) __B,
8793 (__v2di)
a25a7887 8794 _mm_setzero_si128 (),
936c0fe4
AI
8795 (__mmask8) -1);
8796}
8797
8798extern __inline __m128i
8799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8800_mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8801 __m128i __B)
8802{
8803 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8804 (__v2di) __B,
8805 (__v2di) __W,
8806 (__mmask8) __U);
8807}
8808
8809extern __inline __m128i
8810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8811_mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8812{
8813 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8814 (__v2di) __B,
8815 (__v2di)
a25a7887 8816 _mm_setzero_si128 (),
936c0fe4
AI
8817 (__mmask8) __U);
8818}
8819
8820extern __inline __m128i
8821__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8822_mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8823 __m128i __B)
8824{
8825 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8826 (__v4si) __B,
8827 (__v4si) __W,
8828 (__mmask8) __U);
8829}
8830
8831extern __inline __m128i
8832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8833_mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8834{
8835 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8836 (__v4si) __B,
8837 (__v4si)
8838 _mm_setzero_si128 (),
8839 (__mmask8) __U);
8840}
8841
8842extern __inline __m128i
8843__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8844_mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8845 __m128i __B)
8846{
8847 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8848 (__v2di) __B,
8849 (__v2di) __W,
8850 (__mmask8) __U);
8851}
8852
8853extern __inline __m128i
8854__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8855_mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8856{
8857 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8858 (__v2di) __B,
8859 (__v2di)
a25a7887 8860 _mm_setzero_si128 (),
936c0fe4
AI
8861 (__mmask8) __U);
8862}
8863
8864extern __inline __m256i
8865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8866_mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8867 __m128i __B)
8868{
8869 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8870 (__v4si) __B,
8871 (__v8si) __W,
8872 (__mmask8) __U);
8873}
8874
8875extern __inline __m256i
8876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8877_mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8878{
8879 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8880 (__v4si) __B,
8881 (__v8si)
8882 _mm256_setzero_si256 (),
8883 (__mmask8) __U);
8884}
8885
8886extern __inline __m256i
8887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8888_mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8889 __m128i __B)
8890{
8891 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8892 (__v2di) __B,
8893 (__v4di) __W,
8894 (__mmask8) __U);
8895}
8896
8897extern __inline __m256i
8898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8899_mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8900{
8901 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8902 (__v2di) __B,
8903 (__v4di)
8904 _mm256_setzero_si256 (),
8905 (__mmask8) __U);
8906}
8907
8908extern __inline __m256
8909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8910_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
8911 __m256 __Y)
8912{
8913 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8914 (__v8si) __X,
8915 (__v8sf) __W,
8916 (__mmask8) __U);
8917}
8918
8919extern __inline __m256
8920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8921_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
8922{
8923 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8924 (__v8si) __X,
8925 (__v8sf)
8926 _mm256_setzero_ps (),
8927 (__mmask8) __U);
8928}
8929
8930extern __inline __m256d
8931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8932_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8933{
8934 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8935 (__v4di) __X,
8936 (__v4df)
8937 _mm256_setzero_pd (),
8938 (__mmask8) -1);
8939}
8940
8941extern __inline __m256d
8942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8943_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8944 __m256d __Y)
8945{
8946 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8947 (__v4di) __X,
8948 (__v4df) __W,
8949 (__mmask8) __U);
8950}
8951
8952extern __inline __m256d
8953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8954_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8955{
8956 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8957 (__v4di) __X,
8958 (__v4df)
8959 _mm256_setzero_pd (),
8960 (__mmask8) __U);
8961}
8962
8963extern __inline __m256d
8964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8965_mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
8966 __m256i __C)
8967{
8968 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8969 (__v4di) __C,
8970 (__v4df) __W,
8971 (__mmask8)
8972 __U);
8973}
8974
8975extern __inline __m256d
8976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8977_mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
8978{
8979 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8980 (__v4di) __C,
8981 (__v4df)
8982 _mm256_setzero_pd (),
8983 (__mmask8)
8984 __U);
8985}
8986
8987extern __inline __m256
8988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8989_mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
8990 __m256i __C)
8991{
8992 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
8993 (__v8si) __C,
8994 (__v8sf) __W,
8995 (__mmask8) __U);
8996}
8997
8998extern __inline __m256
8999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9000_mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
9001{
9002 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
9003 (__v8si) __C,
9004 (__v8sf)
9005 _mm256_setzero_ps (),
9006 (__mmask8) __U);
9007}
9008
9009extern __inline __m128d
9010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9011_mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
9012 __m128i __C)
9013{
9014 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
9015 (__v2di) __C,
9016 (__v2df) __W,
9017 (__mmask8) __U);
9018}
9019
9020extern __inline __m128d
9021__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9022_mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
9023{
9024 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
9025 (__v2di) __C,
9026 (__v2df)
9027 _mm_setzero_pd (),
9028 (__mmask8) __U);
9029}
9030
9031extern __inline __m128
9032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9033_mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
9034 __m128i __C)
9035{
9036 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
9037 (__v4si) __C,
9038 (__v4sf) __W,
9039 (__mmask8) __U);
9040}
9041
9042extern __inline __m128
9043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9044_mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
9045{
9046 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
9047 (__v4si) __C,
9048 (__v4sf)
9049 _mm_setzero_ps (),
9050 (__mmask8) __U);
9051}
9052
9053extern __inline __m256i
9054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9055_mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
9056{
9057 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9058 (__v8si) __B,
9059 (__v8si)
9060 _mm256_setzero_si256 (),
9061 __M);
9062}
9063
9064extern __inline __m256i
9065__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9066_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
9067{
9068 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9069 (__v4di) __X,
9070 (__v4di)
9071 _mm256_setzero_si256 (),
9072 __M);
9073}
9074
9075extern __inline __m256i
9076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9077_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
9078 __m256i __B)
9079{
9080 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9081 (__v8si) __B,
9082 (__v8si) __W, __M);
9083}
9084
9085extern __inline __m128i
9086__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9087_mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
9088{
9089 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9090 (__v4si) __B,
9091 (__v4si)
9092 _mm_setzero_si128 (),
9093 __M);
9094}
9095
9096extern __inline __m128i
9097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9098_mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
9099 __m128i __B)
9100{
9101 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9102 (__v4si) __B,
9103 (__v4si) __W, __M);
9104}
9105
9106extern __inline __m256i
9107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9108_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9109 __m256i __Y)
9110{
9111 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9112 (__v8si) __Y,
9113 (__v4di) __W, __M);
9114}
9115
9116extern __inline __m256i
9117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9118_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9119{
9120 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9121 (__v8si) __Y,
9122 (__v4di)
9123 _mm256_setzero_si256 (),
9124 __M);
9125}
9126
9127extern __inline __m128i
9128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9129_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
9130 __m128i __Y)
9131{
9132 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9133 (__v4si) __Y,
9134 (__v2di) __W, __M);
9135}
9136
9137extern __inline __m128i
9138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9139_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
9140{
9141 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9142 (__v4si) __Y,
9143 (__v2di)
9144 _mm_setzero_si128 (),
9145 __M);
9146}
9147
395a191d
SP
9148extern __inline __m256i
9149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9150_mm256_permutexvar_epi64 (__m256i __X, __m256i __Y)
9151{
9152 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9153 (__v4di) __X,
9154 (__v4di)
9155 _mm256_setzero_si256 (),
9156 (__mmask8) -1);
9157}
9158
936c0fe4
AI
9159extern __inline __m256i
9160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9161_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
9162 __m256i __Y)
9163{
9164 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9165 (__v4di) __X,
9166 (__v4di) __W,
9167 __M);
9168}
9169
9170extern __inline __m256i
9171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9172_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
9173 __m256i __Y)
9174{
9175 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9176 (__v8si) __Y,
9177 (__v4di) __W, __M);
9178}
9179
9180extern __inline __m256i
9181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9182_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9183{
9184 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9185 (__v8si) __X,
9186 (__v8si)
9187 _mm256_setzero_si256 (),
9188 __M);
9189}
9190
9191extern __inline __m256i
9192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9193_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
9194{
9195 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9196 (__v8si) __Y,
9197 (__v4di)
9198 _mm256_setzero_si256 (),
9199 __M);
9200}
9201
9202extern __inline __m128i
9203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9204_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
9205 __m128i __Y)
9206{
9207 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9208 (__v4si) __Y,
9209 (__v2di) __W, __M);
9210}
9211
9212extern __inline __m128i
9213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9214_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
9215{
9216 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9217 (__v4si) __Y,
9218 (__v2di)
9219 _mm_setzero_si128 (),
9220 __M);
9221}
9222
395a191d
SP
9223extern __inline __m256i
9224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9225_mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
9226{
9227 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9228 (__v8si) __X,
9229 (__v8si)
9230 _mm256_setzero_si256 (),
9231 (__mmask8) -1);
9232}
9233
936c0fe4
AI
9234extern __inline __m256i
9235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9236_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9237 __m256i __Y)
9238{
9239 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9240 (__v8si) __X,
9241 (__v8si) __W,
9242 __M);
9243}
9244
6b62f323
JJ
9245extern __inline __mmask8
9246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9247_mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9248{
6b62f323
JJ
9249 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9250 (__v8si) __Y, 4,
936c0fe4
AI
9251 (__mmask8) __M);
9252}
9253
6b62f323
JJ
9254extern __inline __mmask8
9255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9256_mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
936c0fe4 9257{
6b62f323
JJ
9258 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9259 (__v8si) __Y, 4,
9260 (__mmask8) -1);
936c0fe4
AI
9261}
9262
6b62f323
JJ
9263extern __inline __mmask8
9264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9265_mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9266{
6b62f323
JJ
9267 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9268 (__v8si) __Y, 1,
9269 (__mmask8) __M);
936c0fe4
AI
9270}
9271
6b62f323
JJ
9272extern __inline __mmask8
9273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9274_mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
936c0fe4 9275{
6b62f323
JJ
9276 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9277 (__v8si) __Y, 1,
9278 (__mmask8) -1);
936c0fe4
AI
9279}
9280
6b62f323
JJ
9281extern __inline __mmask8
9282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9283_mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9284{
6b62f323
JJ
9285 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9286 (__v8si) __Y, 5,
9287 (__mmask8) __M);
936c0fe4
AI
9288}
9289
6b62f323
JJ
9290extern __inline __mmask8
9291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9292_mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
936c0fe4 9293{
6b62f323
JJ
9294 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9295 (__v8si) __Y, 5,
9296 (__mmask8) -1);
936c0fe4
AI
9297}
9298
6b62f323
JJ
9299extern __inline __mmask8
9300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9301_mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9302{
6b62f323
JJ
9303 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9304 (__v8si) __Y, 2,
9305 (__mmask8) __M);
936c0fe4
AI
9306}
9307
6b62f323
JJ
9308extern __inline __mmask8
9309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9310_mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
936c0fe4 9311{
6b62f323
JJ
9312 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9313 (__v8si) __Y, 2,
9314 (__mmask8) -1);
936c0fe4
AI
9315}
9316
6b62f323
JJ
9317extern __inline __mmask8
9318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9319_mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9320{
6b62f323
JJ
9321 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9322 (__v4di) __Y, 4,
9323 (__mmask8) __M);
936c0fe4
AI
9324}
9325
6b62f323
JJ
9326extern __inline __mmask8
9327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9328_mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
936c0fe4 9329{
6b62f323
JJ
9330 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9331 (__v4di) __Y, 4,
9332 (__mmask8) -1);
936c0fe4
AI
9333}
9334
6b62f323
JJ
9335extern __inline __mmask8
9336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9337_mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9338{
6b62f323
JJ
9339 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9340 (__v4di) __Y, 1,
9341 (__mmask8) __M);
936c0fe4
AI
9342}
9343
6b62f323
JJ
9344extern __inline __mmask8
9345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9346_mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
936c0fe4 9347{
6b62f323
JJ
9348 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9349 (__v4di) __Y, 1,
9350 (__mmask8) -1);
936c0fe4
AI
9351}
9352
6b62f323
JJ
9353extern __inline __mmask8
9354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9355_mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9356{
6b62f323
JJ
9357 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9358 (__v4di) __Y, 5,
9359 (__mmask8) __M);
936c0fe4
AI
9360}
9361
6b62f323
JJ
9362extern __inline __mmask8
9363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9364_mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
936c0fe4 9365{
6b62f323
JJ
9366 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9367 (__v4di) __Y, 5,
9368 (__mmask8) -1);
936c0fe4
AI
9369}
9370
6b62f323
JJ
9371extern __inline __mmask8
9372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9373_mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9374{
6b62f323
JJ
9375 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9376 (__v4di) __Y, 2,
9377 (__mmask8) __M);
936c0fe4
AI
9378}
9379
6b62f323
JJ
9380extern __inline __mmask8
9381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9382_mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
936c0fe4 9383{
6b62f323
JJ
9384 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9385 (__v4di) __Y, 2,
9386 (__mmask8) -1);
936c0fe4
AI
9387}
9388
6b62f323
JJ
9389extern __inline __mmask8
9390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9391_mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9392{
6b62f323
JJ
9393 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9394 (__v8si) __Y, 4,
9395 (__mmask8) __M);
936c0fe4
AI
9396}
9397
6b62f323
JJ
9398extern __inline __mmask8
9399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9400_mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
9401{
9402 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9403 (__v8si) __Y, 4,
9404 (__mmask8) -1);
936c0fe4
AI
9405}
9406
6b62f323
JJ
9407extern __inline __mmask8
9408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9409_mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9410{
6b62f323
JJ
9411 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9412 (__v8si) __Y, 1,
9413 (__mmask8) __M);
936c0fe4
AI
9414}
9415
6b62f323
JJ
9416extern __inline __mmask8
9417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9418_mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
936c0fe4 9419{
6b62f323
JJ
9420 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9421 (__v8si) __Y, 1,
9422 (__mmask8) -1);
936c0fe4
AI
9423}
9424
6b62f323
JJ
9425extern __inline __mmask8
9426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9427_mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9428{
6b62f323
JJ
9429 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9430 (__v8si) __Y, 5,
9431 (__mmask8) __M);
936c0fe4
AI
9432}
9433
6b62f323
JJ
9434extern __inline __mmask8
9435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9436_mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
936c0fe4 9437{
6b62f323
JJ
9438 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9439 (__v8si) __Y, 5,
9440 (__mmask8) -1);
936c0fe4
AI
9441}
9442
6b62f323
JJ
9443extern __inline __mmask8
9444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9445_mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9446{
6b62f323
JJ
9447 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9448 (__v8si) __Y, 2,
9449 (__mmask8) __M);
936c0fe4
AI
9450}
9451
6b62f323
JJ
9452extern __inline __mmask8
9453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9454_mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
936c0fe4 9455{
6b62f323
JJ
9456 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9457 (__v8si) __Y, 2,
9458 (__mmask8) -1);
936c0fe4
AI
9459}
9460
6b62f323
JJ
9461extern __inline __mmask8
9462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9463_mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9464{
6b62f323
JJ
9465 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9466 (__v4di) __Y, 4,
9467 (__mmask8) __M);
936c0fe4
AI
9468}
9469
6b62f323
JJ
9470extern __inline __mmask8
9471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9472_mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
936c0fe4 9473{
6b62f323
JJ
9474 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9475 (__v4di) __Y, 4,
9476 (__mmask8) -1);
936c0fe4
AI
9477}
9478
6b62f323
JJ
9479extern __inline __mmask8
9480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9481_mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9482{
6b62f323
JJ
9483 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9484 (__v4di) __Y, 1,
9485 (__mmask8) __M);
936c0fe4
AI
9486}
9487
6b62f323
JJ
9488extern __inline __mmask8
9489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9490_mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
936c0fe4 9491{
6b62f323
JJ
9492 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9493 (__v4di) __Y, 1,
9494 (__mmask8) -1);
936c0fe4
AI
9495}
9496
6b62f323
JJ
9497extern __inline __mmask8
9498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9499_mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9500{
6b62f323
JJ
9501 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9502 (__v4di) __Y, 5,
9503 (__mmask8) __M);
936c0fe4
AI
9504}
9505
6b62f323
JJ
9506extern __inline __mmask8
9507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9508_mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
936c0fe4 9509{
6b62f323
JJ
9510 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9511 (__v4di) __Y, 5,
9512 (__mmask8) -1);
936c0fe4
AI
9513}
9514
6b62f323
JJ
9515extern __inline __mmask8
9516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9517_mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9518{
6b62f323
JJ
9519 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9520 (__v4di) __Y, 2,
9521 (__mmask8) __M);
936c0fe4
AI
9522}
9523
6b62f323
JJ
9524extern __inline __mmask8
9525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9526_mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
936c0fe4 9527{
6b62f323
JJ
9528 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9529 (__v4di) __Y, 2,
9530 (__mmask8) -1);
936c0fe4
AI
9531}
9532
6b62f323
JJ
9533extern __inline __mmask8
9534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9535_mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9536{
6b62f323
JJ
9537 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9538 (__v4si) __Y, 4,
9539 (__mmask8) __M);
936c0fe4
AI
9540}
9541
6b62f323
JJ
9542extern __inline __mmask8
9543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9544_mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
936c0fe4 9545{
6b62f323
JJ
9546 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9547 (__v4si) __Y, 4,
9548 (__mmask8) -1);
936c0fe4
AI
9549}
9550
6b62f323
JJ
9551extern __inline __mmask8
9552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9553_mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9554{
6b62f323
JJ
9555 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9556 (__v4si) __Y, 1,
9557 (__mmask8) __M);
936c0fe4
AI
9558}
9559
6b62f323
JJ
9560extern __inline __mmask8
9561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9562_mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
936c0fe4 9563{
6b62f323
JJ
9564 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9565 (__v4si) __Y, 1,
9566 (__mmask8) -1);
936c0fe4
AI
9567}
9568
6b62f323
JJ
9569extern __inline __mmask8
9570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9571_mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9572{
6b62f323
JJ
9573 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9574 (__v4si) __Y, 5,
9575 (__mmask8) __M);
936c0fe4
AI
9576}
9577
6b62f323
JJ
9578extern __inline __mmask8
9579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9580_mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
936c0fe4 9581{
6b62f323
JJ
9582 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9583 (__v4si) __Y, 5,
9584 (__mmask8) -1);
936c0fe4
AI
9585}
9586
6b62f323
JJ
9587extern __inline __mmask8
9588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9589_mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9590{
6b62f323
JJ
9591 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9592 (__v4si) __Y, 2,
9593 (__mmask8) __M);
936c0fe4
AI
9594}
9595
6b62f323
JJ
9596extern __inline __mmask8
9597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9598_mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
936c0fe4 9599{
6b62f323
JJ
9600 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9601 (__v4si) __Y, 2,
9602 (__mmask8) -1);
936c0fe4
AI
9603}
9604
6b62f323
JJ
9605extern __inline __mmask8
9606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9607_mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9608{
6b62f323
JJ
9609 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9610 (__v2di) __Y, 4,
9611 (__mmask8) __M);
936c0fe4
AI
9612}
9613
6b62f323
JJ
9614extern __inline __mmask8
9615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9616_mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
936c0fe4 9617{
6b62f323
JJ
9618 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9619 (__v2di) __Y, 4,
9620 (__mmask8) -1);
936c0fe4
AI
9621}
9622
6b62f323
JJ
9623extern __inline __mmask8
9624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9625_mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9626{
6b62f323
JJ
9627 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9628 (__v2di) __Y, 1,
9629 (__mmask8) __M);
936c0fe4
AI
9630}
9631
6b62f323
JJ
9632extern __inline __mmask8
9633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9634_mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
936c0fe4 9635{
6b62f323
JJ
9636 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9637 (__v2di) __Y, 1,
9638 (__mmask8) -1);
936c0fe4
AI
9639}
9640
6b62f323
JJ
9641extern __inline __mmask8
9642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9643_mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9644{
6b62f323
JJ
9645 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9646 (__v2di) __Y, 5,
9647 (__mmask8) __M);
936c0fe4
AI
9648}
9649
6b62f323
JJ
9650extern __inline __mmask8
9651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9652_mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
936c0fe4 9653{
6b62f323
JJ
9654 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9655 (__v2di) __Y, 5,
9656 (__mmask8) -1);
936c0fe4
AI
9657}
9658
6b62f323
JJ
9659extern __inline __mmask8
9660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9661_mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9662{
6b62f323
JJ
9663 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9664 (__v2di) __Y, 2,
9665 (__mmask8) __M);
936c0fe4
AI
9666}
9667
6b62f323
JJ
9668extern __inline __mmask8
9669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9670_mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
936c0fe4 9671{
6b62f323
JJ
9672 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9673 (__v2di) __Y, 2,
9674 (__mmask8) -1);
936c0fe4
AI
9675}
9676
6b62f323
JJ
9677extern __inline __mmask8
9678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9679_mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9680{
6b62f323
JJ
9681 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9682 (__v4si) __Y, 4,
9683 (__mmask8) __M);
936c0fe4
AI
9684}
9685
6b62f323
JJ
9686extern __inline __mmask8
9687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9688_mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
9689{
9690 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9691 (__v4si) __Y, 4,
9692 (__mmask8) -1);
9693}
9694
9695extern __inline __mmask8
9696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9697_mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9698{
9699 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9700 (__v4si) __Y, 1,
9701 (__mmask8) __M);
9702}
9703
9704extern __inline __mmask8
9705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9706_mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
936c0fe4 9707{
6b62f323
JJ
9708 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9709 (__v4si) __Y, 1,
9710 (__mmask8) -1);
936c0fe4
AI
9711}
9712
6b62f323
JJ
9713extern __inline __mmask8
9714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9715_mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9716{
6b62f323
JJ
9717 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9718 (__v4si) __Y, 5,
9719 (__mmask8) __M);
936c0fe4
AI
9720}
9721
6b62f323
JJ
9722extern __inline __mmask8
9723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9724_mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
936c0fe4 9725{
6b62f323
JJ
9726 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9727 (__v4si) __Y, 5,
9728 (__mmask8) -1);
936c0fe4
AI
9729}
9730
6b62f323
JJ
9731extern __inline __mmask8
9732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9733_mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9734{
6b62f323
JJ
9735 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9736 (__v4si) __Y, 2,
9737 (__mmask8) __M);
936c0fe4
AI
9738}
9739
6b62f323
JJ
9740extern __inline __mmask8
9741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9742_mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
936c0fe4 9743{
6b62f323
JJ
9744 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9745 (__v4si) __Y, 2,
9746 (__mmask8) -1);
936c0fe4
AI
9747}
9748
6b62f323
JJ
9749extern __inline __mmask8
9750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9751_mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9752{
6b62f323
JJ
9753 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9754 (__v2di) __Y, 4,
9755 (__mmask8) __M);
936c0fe4
AI
9756}
9757
6b62f323
JJ
9758extern __inline __mmask8
9759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9760_mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
936c0fe4 9761{
6b62f323
JJ
9762 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9763 (__v2di) __Y, 4,
9764 (__mmask8) -1);
936c0fe4
AI
9765}
9766
6b62f323
JJ
9767extern __inline __mmask8
9768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9769_mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9770{
6b62f323
JJ
9771 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9772 (__v2di) __Y, 1,
9773 (__mmask8) __M);
936c0fe4
AI
9774}
9775
6b62f323
JJ
9776extern __inline __mmask8
9777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9778_mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
936c0fe4 9779{
6b62f323
JJ
9780 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9781 (__v2di) __Y, 1,
9782 (__mmask8) -1);
936c0fe4
AI
9783}
9784
6b62f323
JJ
9785extern __inline __mmask8
9786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9787_mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9788{
6b62f323
JJ
9789 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9790 (__v2di) __Y, 5,
9791 (__mmask8) __M);
936c0fe4
AI
9792}
9793
6b62f323
JJ
9794extern __inline __mmask8
9795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9796_mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
936c0fe4 9797{
6b62f323
JJ
9798 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9799 (__v2di) __Y, 5,
9800 (__mmask8) -1);
936c0fe4
AI
9801}
9802
6b62f323
JJ
9803extern __inline __mmask8
9804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9805_mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9806{
6b62f323
JJ
9807 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9808 (__v2di) __Y, 2,
9809 (__mmask8) __M);
936c0fe4
AI
9810}
9811
6b62f323
JJ
9812extern __inline __mmask8
9813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9814_mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
936c0fe4 9815{
6b62f323
JJ
9816 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9817 (__v2di) __Y, 2,
9818 (__mmask8) -1);
936c0fe4
AI
9819}
9820
6b62f323 9821#ifdef __OPTIMIZE__
395a191d
SP
9822extern __inline __m256i
9823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9824_mm256_permutex_epi64 (__m256i __X, const int __I)
9825{
9826 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9827 __I,
9828 (__v4di)
9829 _mm256_setzero_si256(),
9830 (__mmask8) -1);
9831}
9832
6b62f323 9833extern __inline __m256i
936c0fe4 9834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9835_mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
9836 __m256i __X, const int __I)
936c0fe4 9837{
6b62f323
JJ
9838 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9839 __I,
9840 (__v4di) __W,
9841 (__mmask8) __M);
936c0fe4
AI
9842}
9843
6b62f323 9844extern __inline __m256i
936c0fe4 9845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 9846_mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I)
936c0fe4 9847{
6b62f323
JJ
9848 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9849 __I,
9850 (__v4di)
9851 _mm256_setzero_si256 (),
9852 (__mmask8) __M);
936c0fe4
AI
9853}
9854
6b62f323 9855extern __inline __m256d
936c0fe4 9856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9857_mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A,
9858 __m256d __B, const int __imm)
936c0fe4 9859{
6b62f323
JJ
9860 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9861 (__v4df) __B, __imm,
9862 (__v4df) __W,
9863 (__mmask8) __U);
936c0fe4
AI
9864}
9865
6b62f323 9866extern __inline __m256d
936c0fe4 9867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9868_mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B,
9869 const int __imm)
936c0fe4 9870{
6b62f323
JJ
9871 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9872 (__v4df) __B, __imm,
9873 (__v4df)
9874 _mm256_setzero_pd (),
9875 (__mmask8) __U);
936c0fe4
AI
9876}
9877
6b62f323 9878extern __inline __m128d
936c0fe4 9879__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9880_mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A,
9881 __m128d __B, const int __imm)
936c0fe4 9882{
6b62f323
JJ
9883 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9884 (__v2df) __B, __imm,
9885 (__v2df) __W,
9886 (__mmask8) __U);
936c0fe4
AI
9887}
9888
6b62f323 9889extern __inline __m128d
936c0fe4 9890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9891_mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B,
9892 const int __imm)
936c0fe4 9893{
6b62f323
JJ
9894 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9895 (__v2df) __B, __imm,
9896 (__v2df)
9897 _mm_setzero_pd (),
9898 (__mmask8) __U);
936c0fe4
AI
9899}
9900
9901extern __inline __m256
9902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9903_mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A,
9904 __m256 __B, const int __imm)
936c0fe4 9905{
6b62f323
JJ
9906 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9907 (__v8sf) __B, __imm,
9908 (__v8sf) __W,
9909 (__mmask8) __U);
936c0fe4
AI
9910}
9911
6b62f323 9912extern __inline __m256
936c0fe4 9913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9914_mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B,
9915 const int __imm)
936c0fe4 9916{
6b62f323
JJ
9917 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9918 (__v8sf) __B, __imm,
9919 (__v8sf)
9920 _mm256_setzero_ps (),
9921 (__mmask8) __U);
936c0fe4
AI
9922}
9923
6b62f323 9924extern __inline __m128
936c0fe4 9925__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9926_mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
9927 const int __imm)
936c0fe4 9928{
6b62f323
JJ
9929 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9930 (__v4sf) __B, __imm,
9931 (__v4sf) __W,
9932 (__mmask8) __U);
936c0fe4
AI
9933}
9934
6b62f323 9935extern __inline __m128
936c0fe4 9936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9937_mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B,
9938 const int __imm)
936c0fe4 9939{
6b62f323
JJ
9940 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9941 (__v4sf) __B, __imm,
9942 (__v4sf)
9943 _mm_setzero_ps (),
9944 (__mmask8) __U);
936c0fe4
AI
9945}
9946
6b62f323 9947extern __inline __m256i
936c0fe4 9948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 9949_mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm)
936c0fe4 9950{
6b62f323
JJ
9951 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9952 (__v4si) __B,
9953 __imm,
9954 (__v8si)
9955 _mm256_setzero_si256 (),
9956 (__mmask8) -1);
936c0fe4
AI
9957}
9958
6b62f323 9959extern __inline __m256i
936c0fe4 9960__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9961_mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A,
9962 __m128i __B, const int __imm)
936c0fe4 9963{
6b62f323
JJ
9964 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9965 (__v4si) __B,
9966 __imm,
9967 (__v8si) __W,
9968 (__mmask8)
9969 __U);
936c0fe4
AI
9970}
9971
6b62f323 9972extern __inline __m256i
936c0fe4 9973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9974_mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B,
9975 const int __imm)
936c0fe4 9976{
6b62f323
JJ
9977 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9978 (__v4si) __B,
9979 __imm,
9980 (__v8si)
9981 _mm256_setzero_si256 (),
9982 (__mmask8)
9983 __U);
936c0fe4
AI
9984}
9985
6b62f323 9986extern __inline __m256
936c0fe4 9987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 9988_mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm)
936c0fe4 9989{
6b62f323
JJ
9990 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9991 (__v4sf) __B,
936c0fe4 9992 __imm,
6b62f323
JJ
9993 (__v8sf)
9994 _mm256_setzero_ps (),
936c0fe4
AI
9995 (__mmask8) -1);
9996}
9997
6b62f323 9998extern __inline __m256
936c0fe4 9999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10000_mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A,
10001 __m128 __B, const int __imm)
936c0fe4 10002{
6b62f323
JJ
10003 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
10004 (__v4sf) __B,
936c0fe4 10005 __imm,
6b62f323 10006 (__v8sf) __W,
936c0fe4
AI
10007 (__mmask8) __U);
10008}
10009
6b62f323 10010extern __inline __m256
936c0fe4 10011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10012_mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B,
10013 const int __imm)
936c0fe4 10014{
6b62f323
JJ
10015 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
10016 (__v4sf) __B,
936c0fe4 10017 __imm,
6b62f323
JJ
10018 (__v8sf)
10019 _mm256_setzero_ps (),
936c0fe4
AI
10020 (__mmask8) __U);
10021}
10022
6b62f323 10023extern __inline __m128i
936c0fe4 10024__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10025_mm256_extracti32x4_epi32 (__m256i __A, const int __imm)
936c0fe4 10026{
6b62f323
JJ
10027 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
10028 __imm,
10029 (__v4si)
10030 _mm_setzero_si128 (),
10031 (__mmask8) -1);
936c0fe4
AI
10032}
10033
6b62f323 10034extern __inline __m128i
936c0fe4 10035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10036_mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A,
10037 const int __imm)
936c0fe4 10038{
6b62f323
JJ
10039 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
10040 __imm,
10041 (__v4si) __W,
10042 (__mmask8)
10043 __U);
936c0fe4
AI
10044}
10045
6b62f323 10046extern __inline __m128i
936c0fe4 10047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10048_mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A,
10049 const int __imm)
936c0fe4 10050{
6b62f323
JJ
10051 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
10052 __imm,
10053 (__v4si)
10054 _mm_setzero_si128 (),
10055 (__mmask8)
10056 __U);
936c0fe4
AI
10057}
10058
10059extern __inline __m128
10060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10061_mm256_extractf32x4_ps (__m256 __A, const int __imm)
936c0fe4 10062{
6b62f323
JJ
10063 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10064 __imm,
10065 (__v4sf)
10066 _mm_setzero_ps (),
10067 (__mmask8) -1);
936c0fe4
AI
10068}
10069
10070extern __inline __m128
10071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10072_mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A,
10073 const int __imm)
936c0fe4 10074{
6b62f323
JJ
10075 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10076 __imm,
10077 (__v4sf) __W,
10078 (__mmask8)
10079 __U);
936c0fe4
AI
10080}
10081
10082extern __inline __m128
10083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10084_mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A,
10085 const int __imm)
10086{
10087 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10088 __imm,
10089 (__v4sf)
10090 _mm_setzero_ps (),
10091 (__mmask8)
10092 __U);
10093}
10094
10095extern __inline __m256i
10096__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10097_mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm)
10098{
10099 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10100 (__v4di) __B,
10101 __imm,
10102 (__v4di)
10103 _mm256_setzero_si256 (),
10104 (__mmask8) -1);
10105}
10106
10107extern __inline __m256i
10108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10109_mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A,
10110 __m256i __B, const int __imm)
936c0fe4 10111{
6b62f323
JJ
10112 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10113 (__v4di) __B,
10114 __imm,
10115 (__v4di) __W,
10116 (__mmask8) __U);
936c0fe4
AI
10117}
10118
6b62f323 10119extern __inline __m256i
936c0fe4 10120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10121_mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B,
10122 const int __imm)
936c0fe4 10123{
6b62f323
JJ
10124 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10125 (__v4di) __B,
10126 __imm,
10127 (__v4di)
10128 _mm256_setzero_si256 (),
10129 (__mmask8) __U);
936c0fe4
AI
10130}
10131
6b62f323 10132extern __inline __m256i
936c0fe4 10133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10134_mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm)
936c0fe4 10135{
6b62f323
JJ
10136 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10137 (__v8si) __B,
10138 __imm,
10139 (__v8si)
10140 _mm256_setzero_si256 (),
10141 (__mmask8) -1);
936c0fe4
AI
10142}
10143
6b62f323 10144extern __inline __m256i
936c0fe4 10145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10146_mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A,
10147 __m256i __B, const int __imm)
936c0fe4 10148{
6b62f323
JJ
10149 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10150 (__v8si) __B,
10151 __imm,
10152 (__v8si) __W,
10153 (__mmask8) __U);
936c0fe4
AI
10154}
10155
6b62f323 10156extern __inline __m256i
936c0fe4 10157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10158_mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B,
10159 const int __imm)
936c0fe4 10160{
6b62f323
JJ
10161 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10162 (__v8si) __B,
10163 __imm,
10164 (__v8si)
10165 _mm256_setzero_si256 (),
10166 (__mmask8) __U);
936c0fe4
AI
10167}
10168
6b62f323 10169extern __inline __m256d
936c0fe4 10170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10171_mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm)
936c0fe4 10172{
6b62f323
JJ
10173 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10174 (__v4df) __B,
10175 __imm,
10176 (__v4df)
10177 _mm256_setzero_pd (),
10178 (__mmask8) -1);
936c0fe4
AI
10179}
10180
6b62f323 10181extern __inline __m256d
936c0fe4 10182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10183_mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A,
10184 __m256d __B, const int __imm)
936c0fe4 10185{
6b62f323
JJ
10186 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10187 (__v4df) __B,
10188 __imm,
10189 (__v4df) __W,
10190 (__mmask8) __U);
936c0fe4
AI
10191}
10192
6b62f323 10193extern __inline __m256d
936c0fe4 10194__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10195_mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B,
10196 const int __imm)
936c0fe4 10197{
6b62f323
JJ
10198 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10199 (__v4df) __B,
10200 __imm,
10201 (__v4df)
10202 _mm256_setzero_pd (),
10203 (__mmask8) __U);
936c0fe4
AI
10204}
10205
6b62f323 10206extern __inline __m256
936c0fe4 10207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10208_mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm)
936c0fe4 10209{
6b62f323
JJ
10210 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10211 (__v8sf) __B,
10212 __imm,
10213 (__v8sf)
10214 _mm256_setzero_ps (),
10215 (__mmask8) -1);
936c0fe4
AI
10216}
10217
6b62f323 10218extern __inline __m256
936c0fe4 10219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10220_mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A,
10221 __m256 __B, const int __imm)
936c0fe4 10222{
6b62f323
JJ
10223 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10224 (__v8sf) __B,
10225 __imm,
10226 (__v8sf) __W,
10227 (__mmask8) __U);
936c0fe4
AI
10228}
10229
6b62f323 10230extern __inline __m256
936c0fe4 10231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10232_mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B,
10233 const int __imm)
936c0fe4 10234{
6b62f323
JJ
10235 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10236 (__v8sf) __B,
10237 __imm,
10238 (__v8sf)
10239 _mm256_setzero_ps (),
10240 (__mmask8) __U);
936c0fe4
AI
10241}
10242
6b62f323 10243extern __inline __m256d
936c0fe4 10244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10245_mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C,
10246 const int __imm)
936c0fe4 10247{
6b62f323
JJ
10248 return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
10249 (__v4df) __B,
10250 (__v4di) __C,
10251 __imm,
10252 (__mmask8) -1);
936c0fe4
AI
10253}
10254
6b62f323 10255extern __inline __m256d
936c0fe4 10256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10257_mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B,
10258 __m256i __C, const int __imm)
936c0fe4 10259{
6b62f323
JJ
10260 return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
10261 (__v4df) __B,
10262 (__v4di) __C,
10263 __imm,
10264 (__mmask8) __U);
936c0fe4
AI
10265}
10266
10267extern __inline __m256d
10268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10269_mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B,
10270 __m256i __C, const int __imm)
936c0fe4 10271{
6b62f323
JJ
10272 return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A,
10273 (__v4df) __B,
10274 (__v4di) __C,
10275 __imm,
10276 (__mmask8) __U);
936c0fe4
AI
10277}
10278
6b62f323 10279extern __inline __m256
936c0fe4 10280__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10281_mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C,
10282 const int __imm)
936c0fe4 10283{
6b62f323
JJ
10284 return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
10285 (__v8sf) __B,
10286 (__v8si) __C,
10287 __imm,
10288 (__mmask8) -1);
936c0fe4
AI
10289}
10290
6b62f323 10291extern __inline __m256
936c0fe4 10292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10293_mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B,
10294 __m256i __C, const int __imm)
936c0fe4 10295{
6b62f323
JJ
10296 return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
10297 (__v8sf) __B,
10298 (__v8si) __C,
10299 __imm,
10300 (__mmask8) __U);
936c0fe4
AI
10301}
10302
6b62f323 10303extern __inline __m256
936c0fe4 10304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10305_mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B,
10306 __m256i __C, const int __imm)
936c0fe4 10307{
6b62f323
JJ
10308 return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A,
10309 (__v8sf) __B,
10310 (__v8si) __C,
10311 __imm,
10312 (__mmask8) __U);
936c0fe4
AI
10313}
10314
6b62f323 10315extern __inline __m128d
936c0fe4 10316__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10317_mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C,
10318 const int __imm)
936c0fe4 10319{
6b62f323
JJ
10320 return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
10321 (__v2df) __B,
10322 (__v2di) __C,
10323 __imm,
10324 (__mmask8) -1);
936c0fe4
AI
10325}
10326
6b62f323 10327extern __inline __m128d
936c0fe4 10328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10329_mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B,
10330 __m128i __C, const int __imm)
936c0fe4 10331{
6b62f323
JJ
10332 return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
10333 (__v2df) __B,
10334 (__v2di) __C,
10335 __imm,
10336 (__mmask8) __U);
936c0fe4
AI
10337}
10338
6b62f323 10339extern __inline __m128d
936c0fe4 10340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10341_mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B,
10342 __m128i __C, const int __imm)
936c0fe4 10343{
6b62f323
JJ
10344 return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A,
10345 (__v2df) __B,
10346 (__v2di) __C,
10347 __imm,
10348 (__mmask8) __U);
936c0fe4
AI
10349}
10350
6b62f323 10351extern __inline __m128
936c0fe4 10352__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10353_mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm)
936c0fe4 10354{
6b62f323
JJ
10355 return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
10356 (__v4sf) __B,
10357 (__v4si) __C,
10358 __imm,
10359 (__mmask8) -1);
936c0fe4
AI
10360}
10361
6b62f323 10362extern __inline __m128
936c0fe4 10363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10364_mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B,
10365 __m128i __C, const int __imm)
936c0fe4 10366{
6b62f323
JJ
10367 return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
10368 (__v4sf) __B,
10369 (__v4si) __C,
10370 __imm,
10371 (__mmask8) __U);
936c0fe4
AI
10372}
10373
6b62f323 10374extern __inline __m128
936c0fe4 10375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10376_mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B,
10377 __m128i __C, const int __imm)
936c0fe4 10378{
6b62f323
JJ
10379 return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A,
10380 (__v4sf) __B,
10381 (__v4si) __C,
10382 __imm,
10383 (__mmask8) __U);
936c0fe4
AI
10384}
10385
6b62f323 10386extern __inline __m256i
936c0fe4 10387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10388_mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10389 const int __imm)
936c0fe4 10390{
6b62f323
JJ
10391 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
10392 (__v8si) __W,
10393 (__mmask8) __U);
936c0fe4
AI
10394}
10395
6b62f323 10396extern __inline __m256i
936c0fe4 10397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10398_mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm)
936c0fe4 10399{
6b62f323
JJ
10400 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
10401 (__v8si)
10402 _mm256_setzero_si256 (),
10403 (__mmask8) __U);
936c0fe4
AI
10404}
10405
6b62f323 10406extern __inline __m128i
936c0fe4 10407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10408_mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10409 const int __imm)
936c0fe4 10410{
6b62f323
JJ
10411 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
10412 (__v4si) __W,
10413 (__mmask8) __U);
936c0fe4
AI
10414}
10415
6b62f323 10416extern __inline __m128i
936c0fe4 10417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10418_mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm)
936c0fe4 10419{
6b62f323
JJ
10420 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
10421 (__v4si)
10422 _mm_setzero_si128 (),
10423 (__mmask8) __U);
936c0fe4
AI
10424}
10425
6b62f323 10426extern __inline __m256i
936c0fe4 10427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10428_mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
10429 const int __imm)
936c0fe4 10430{
6b62f323
JJ
10431 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
10432 (__v4di) __W,
10433 (__mmask8) __U);
936c0fe4
AI
10434}
10435
6b62f323 10436extern __inline __m256i
936c0fe4 10437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10438_mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm)
936c0fe4 10439{
6b62f323
JJ
10440 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
10441 (__v4di)
10442 _mm256_setzero_si256 (),
10443 (__mmask8) __U);
936c0fe4
AI
10444}
10445
6b62f323 10446extern __inline __m128i
936c0fe4 10447__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10448_mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
10449 const int __imm)
936c0fe4 10450{
6b62f323
JJ
10451 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
10452 (__v2di) __W,
10453 (__mmask8) __U);
936c0fe4
AI
10454}
10455
6b62f323 10456extern __inline __m128i
936c0fe4 10457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10458_mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm)
936c0fe4 10459{
6b62f323
JJ
10460 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
10461 (__v2di)
10462 _mm_setzero_si128 (),
10463 (__mmask8) __U);
936c0fe4
AI
10464}
10465
6b62f323 10466extern __inline __m256i
936c0fe4 10467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10468_mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
10469 const int __imm)
936c0fe4 10470{
6b62f323
JJ
10471 return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
10472 (__v4di) __B,
10473 (__v4di) __C, __imm,
10474 (__mmask8) -1);
936c0fe4
AI
10475}
10476
6b62f323 10477extern __inline __m256i
936c0fe4 10478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10479_mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
10480 __m256i __B, __m256i __C,
10481 const int __imm)
936c0fe4 10482{
6b62f323
JJ
10483 return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
10484 (__v4di) __B,
10485 (__v4di) __C, __imm,
10486 (__mmask8) __U);
936c0fe4
AI
10487}
10488
6b62f323 10489extern __inline __m256i
936c0fe4 10490__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10491_mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
10492 __m256i __B, __m256i __C,
10493 const int __imm)
936c0fe4 10494{
6b62f323
JJ
10495 return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
10496 (__v4di) __B,
10497 (__v4di) __C,
10498 __imm,
10499 (__mmask8) __U);
936c0fe4
AI
10500}
10501
6b62f323 10502extern __inline __m256i
936c0fe4 10503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10504_mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
10505 const int __imm)
936c0fe4 10506{
6b62f323
JJ
10507 return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
10508 (__v8si) __B,
10509 (__v8si) __C, __imm,
10510 (__mmask8) -1);
936c0fe4
AI
10511}
10512
6b62f323 10513extern __inline __m256i
936c0fe4 10514__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10515_mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
10516 __m256i __B, __m256i __C,
10517 const int __imm)
936c0fe4 10518{
6b62f323
JJ
10519 return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
10520 (__v8si) __B,
10521 (__v8si) __C, __imm,
10522 (__mmask8) __U);
936c0fe4
AI
10523}
10524
6b62f323 10525extern __inline __m256i
936c0fe4 10526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10527_mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
10528 __m256i __B, __m256i __C,
10529 const int __imm)
936c0fe4 10530{
6b62f323
JJ
10531 return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
10532 (__v8si) __B,
10533 (__v8si) __C,
10534 __imm,
10535 (__mmask8) __U);
936c0fe4
AI
10536}
10537
6b62f323 10538extern __inline __m128i
936c0fe4 10539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10540_mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
10541 const int __imm)
936c0fe4 10542{
6b62f323
JJ
10543 return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
10544 (__v2di) __B,
10545 (__v2di) __C, __imm,
10546 (__mmask8) -1);
936c0fe4
AI
10547}
10548
6b62f323 10549extern __inline __m128i
936c0fe4 10550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10551_mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
10552 __m128i __B, __m128i __C, const int __imm)
936c0fe4 10553{
6b62f323
JJ
10554 return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
10555 (__v2di) __B,
10556 (__v2di) __C, __imm,
10557 (__mmask8) __U);
936c0fe4
AI
10558}
10559
6b62f323 10560extern __inline __m128i
936c0fe4 10561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10562_mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
10563 __m128i __B, __m128i __C, const int __imm)
936c0fe4 10564{
6b62f323
JJ
10565 return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
10566 (__v2di) __B,
10567 (__v2di) __C,
10568 __imm,
10569 (__mmask8) __U);
936c0fe4
AI
10570}
10571
6b62f323 10572extern __inline __m128i
936c0fe4 10573__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10574_mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
10575 const int __imm)
936c0fe4 10576{
6b62f323
JJ
10577 return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
10578 (__v4si) __B,
10579 (__v4si) __C, __imm,
10580 (__mmask8) -1);
936c0fe4
AI
10581}
10582
6b62f323 10583extern __inline __m128i
936c0fe4 10584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10585_mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
10586 __m128i __B, __m128i __C, const int __imm)
936c0fe4 10587{
6b62f323
JJ
10588 return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
10589 (__v4si) __B,
10590 (__v4si) __C, __imm,
10591 (__mmask8) __U);
936c0fe4
AI
10592}
10593
6b62f323 10594extern __inline __m128i
936c0fe4 10595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10596_mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
10597 __m128i __B, __m128i __C, const int __imm)
936c0fe4 10598{
6b62f323
JJ
10599 return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
10600 (__v4si) __B,
10601 (__v4si) __C,
10602 __imm,
10603 (__mmask8) __U);
936c0fe4
AI
10604}
10605
6b62f323 10606extern __inline __m256
936c0fe4 10607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10608_mm256_roundscale_ps (__m256 __A, const int __imm)
936c0fe4 10609{
6b62f323
JJ
10610 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10611 __imm,
10612 (__v8sf)
10613 _mm256_setzero_ps (),
10614 (__mmask8) -1);
936c0fe4
AI
10615}
10616
6b62f323 10617extern __inline __m256
936c0fe4 10618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10619_mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A,
10620 const int __imm)
936c0fe4 10621{
6b62f323
JJ
10622 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10623 __imm,
10624 (__v8sf) __W,
10625 (__mmask8) __U);
936c0fe4
AI
10626}
10627
6b62f323 10628extern __inline __m256
936c0fe4 10629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10630_mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm)
936c0fe4 10631{
6b62f323
JJ
10632 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10633 __imm,
10634 (__v8sf)
10635 _mm256_setzero_ps (),
10636 (__mmask8) __U);
936c0fe4
AI
10637}
10638
6b62f323 10639extern __inline __m256d
936c0fe4 10640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10641_mm256_roundscale_pd (__m256d __A, const int __imm)
936c0fe4 10642{
6b62f323
JJ
10643 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10644 __imm,
10645 (__v4df)
10646 _mm256_setzero_pd (),
10647 (__mmask8) -1);
936c0fe4
AI
10648}
10649
6b62f323 10650extern __inline __m256d
936c0fe4 10651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10652_mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A,
10653 const int __imm)
936c0fe4 10654{
6b62f323
JJ
10655 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10656 __imm,
10657 (__v4df) __W,
10658 (__mmask8) __U);
936c0fe4
AI
10659}
10660
6b62f323 10661extern __inline __m256d
936c0fe4 10662__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10663_mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm)
936c0fe4 10664{
6b62f323
JJ
10665 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10666 __imm,
10667 (__v4df)
10668 _mm256_setzero_pd (),
10669 (__mmask8) __U);
936c0fe4
AI
10670}
10671
6b62f323 10672extern __inline __m128
936c0fe4 10673__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10674_mm_roundscale_ps (__m128 __A, const int __imm)
936c0fe4 10675{
6b62f323
JJ
10676 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10677 __imm,
10678 (__v4sf)
10679 _mm_setzero_ps (),
10680 (__mmask8) -1);
936c0fe4
AI
10681}
10682
6b62f323 10683extern __inline __m128
936c0fe4 10684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10685_mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A,
10686 const int __imm)
936c0fe4 10687{
6b62f323
JJ
10688 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10689 __imm,
10690 (__v4sf) __W,
10691 (__mmask8) __U);
936c0fe4
AI
10692}
10693
6b62f323 10694extern __inline __m128
936c0fe4 10695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10696_mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm)
936c0fe4 10697{
6b62f323
JJ
10698 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10699 __imm,
10700 (__v4sf)
10701 _mm_setzero_ps (),
10702 (__mmask8) __U);
936c0fe4
AI
10703}
10704
6b62f323 10705extern __inline __m128d
936c0fe4 10706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10707_mm_roundscale_pd (__m128d __A, const int __imm)
936c0fe4 10708{
6b62f323
JJ
10709 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10710 __imm,
10711 (__v2df)
10712 _mm_setzero_pd (),
10713 (__mmask8) -1);
936c0fe4
AI
10714}
10715
6b62f323 10716extern __inline __m128d
936c0fe4 10717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10718_mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A,
10719 const int __imm)
936c0fe4 10720{
6b62f323
JJ
10721 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10722 __imm,
10723 (__v2df) __W,
10724 (__mmask8) __U);
936c0fe4
AI
10725}
10726
6b62f323 10727extern __inline __m128d
936c0fe4 10728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10729_mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm)
936c0fe4 10730{
6b62f323
JJ
10731 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10732 __imm,
10733 (__v2df)
10734 _mm_setzero_pd (),
10735 (__mmask8) __U);
936c0fe4
AI
10736}
10737
6b62f323 10738extern __inline __m256
936c0fe4 10739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10740_mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
10741 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10742{
6b62f323
JJ
10743 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10744 (__C << 2) | __B,
10745 (__v8sf)
10746 _mm256_setzero_ps (),
10747 (__mmask8) -1);
936c0fe4
AI
10748}
10749
6b62f323 10750extern __inline __m256
936c0fe4 10751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10752_mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A,
10753 _MM_MANTISSA_NORM_ENUM __B,
10754 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10755{
6b62f323
JJ
10756 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10757 (__C << 2) | __B,
10758 (__v8sf) __W,
10759 (__mmask8) __U);
936c0fe4
AI
10760}
10761
6b62f323 10762extern __inline __m256
936c0fe4 10763__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10764_mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A,
10765 _MM_MANTISSA_NORM_ENUM __B,
10766 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10767{
6b62f323
JJ
10768 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10769 (__C << 2) | __B,
10770 (__v8sf)
10771 _mm256_setzero_ps (),
10772 (__mmask8) __U);
936c0fe4
AI
10773}
10774
6b62f323 10775extern __inline __m128
936c0fe4 10776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10777_mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B,
10778 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10779{
6b62f323
JJ
10780 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10781 (__C << 2) | __B,
10782 (__v4sf)
10783 _mm_setzero_ps (),
10784 (__mmask8) -1);
936c0fe4
AI
10785}
10786
6b62f323 10787extern __inline __m128
936c0fe4 10788__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10789_mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A,
10790 _MM_MANTISSA_NORM_ENUM __B,
10791 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10792{
6b62f323
JJ
10793 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10794 (__C << 2) | __B,
10795 (__v4sf) __W,
10796 (__mmask8) __U);
936c0fe4
AI
10797}
10798
6b62f323 10799extern __inline __m128
936c0fe4 10800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10801_mm_maskz_getmant_ps (__mmask8 __U, __m128 __A,
10802 _MM_MANTISSA_NORM_ENUM __B,
10803 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10804{
6b62f323
JJ
10805 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10806 (__C << 2) | __B,
10807 (__v4sf)
10808 _mm_setzero_ps (),
10809 (__mmask8) __U);
936c0fe4
AI
10810}
10811
6b62f323 10812extern __inline __m256d
936c0fe4 10813__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10814_mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
10815 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10816{
6b62f323
JJ
10817 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10818 (__C << 2) | __B,
10819 (__v4df)
10820 _mm256_setzero_pd (),
10821 (__mmask8) -1);
936c0fe4
AI
10822}
10823
6b62f323 10824extern __inline __m256d
936c0fe4 10825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10826_mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A,
10827 _MM_MANTISSA_NORM_ENUM __B,
10828 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10829{
6b62f323
JJ
10830 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10831 (__C << 2) | __B,
10832 (__v4df) __W,
10833 (__mmask8) __U);
936c0fe4
AI
10834}
10835
6b62f323 10836extern __inline __m256d
936c0fe4 10837__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10838_mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A,
10839 _MM_MANTISSA_NORM_ENUM __B,
10840 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10841{
6b62f323
JJ
10842 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10843 (__C << 2) | __B,
10844 (__v4df)
10845 _mm256_setzero_pd (),
10846 (__mmask8) __U);
936c0fe4
AI
10847}
10848
6b62f323 10849extern __inline __m128d
936c0fe4 10850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10851_mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B,
10852 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10853{
6b62f323
JJ
10854 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10855 (__C << 2) | __B,
10856 (__v2df)
10857 _mm_setzero_pd (),
10858 (__mmask8) -1);
936c0fe4
AI
10859}
10860
6b62f323 10861extern __inline __m128d
936c0fe4 10862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10863_mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A,
10864 _MM_MANTISSA_NORM_ENUM __B,
10865 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10866{
6b62f323
JJ
10867 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10868 (__C << 2) | __B,
10869 (__v2df) __W,
10870 (__mmask8) __U);
936c0fe4
AI
10871}
10872
6b62f323 10873extern __inline __m128d
936c0fe4 10874__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10875_mm_maskz_getmant_pd (__mmask8 __U, __m128d __A,
10876 _MM_MANTISSA_NORM_ENUM __B,
10877 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10878{
6b62f323
JJ
10879 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10880 (__C << 2) | __B,
10881 (__v2df)
10882 _mm_setzero_pd (),
10883 (__mmask8) __U);
936c0fe4
AI
10884}
10885
6b62f323 10886extern __inline __m256
936c0fe4 10887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10888_mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask,
10889 __m256i __index, void const *__addr,
10890 int __scale)
936c0fe4 10891{
6b62f323
JJ
10892 return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old,
10893 __addr,
10894 (__v8si) __index,
10895 __mask, __scale);
936c0fe4
AI
10896}
10897
6b62f323 10898extern __inline __m128
936c0fe4 10899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10900_mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask,
10901 __m128i __index, void const *__addr,
10902 int __scale)
936c0fe4 10903{
6b62f323
JJ
10904 return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old,
10905 __addr,
10906 (__v4si) __index,
10907 __mask, __scale);
936c0fe4
AI
10908}
10909
6b62f323 10910extern __inline __m256d
936c0fe4 10911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10912_mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask,
10913 __m128i __index, void const *__addr,
10914 int __scale)
936c0fe4 10915{
6b62f323
JJ
10916 return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old,
10917 __addr,
10918 (__v4si) __index,
10919 __mask, __scale);
936c0fe4
AI
10920}
10921
6b62f323 10922extern __inline __m128d
936c0fe4 10923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10924_mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask,
10925 __m128i __index, void const *__addr,
10926 int __scale)
936c0fe4 10927{
6b62f323
JJ
10928 return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old,
10929 __addr,
10930 (__v4si) __index,
10931 __mask, __scale);
936c0fe4
AI
10932}
10933
6b62f323 10934extern __inline __m128
936c0fe4 10935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10936_mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10937 __m256i __index, void const *__addr,
10938 int __scale)
10939{
10940 return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old,
10941 __addr,
10942 (__v4di) __index,
10943 __mask, __scale);
936c0fe4
AI
10944}
10945
6b62f323 10946extern __inline __m128
936c0fe4 10947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10948_mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10949 __m128i __index, void const *__addr,
10950 int __scale)
936c0fe4 10951{
6b62f323
JJ
10952 return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old,
10953 __addr,
10954 (__v2di) __index,
10955 __mask, __scale);
936c0fe4
AI
10956}
10957
6b62f323 10958extern __inline __m256d
936c0fe4 10959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10960_mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask,
10961 __m256i __index, void const *__addr,
10962 int __scale)
936c0fe4 10963{
6b62f323
JJ
10964 return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old,
10965 __addr,
10966 (__v4di) __index,
10967 __mask, __scale);
936c0fe4
AI
10968}
10969
6b62f323 10970extern __inline __m128d
936c0fe4 10971__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10972_mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask,
10973 __m128i __index, void const *__addr,
10974 int __scale)
936c0fe4 10975{
6b62f323
JJ
10976 return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old,
10977 __addr,
10978 (__v2di) __index,
10979 __mask, __scale);
936c0fe4
AI
10980}
10981
6b62f323 10982extern __inline __m256i
936c0fe4 10983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10984_mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask,
10985 __m256i __index, void const *__addr,
10986 int __scale)
936c0fe4 10987{
6b62f323
JJ
10988 return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old,
10989 __addr,
10990 (__v8si) __index,
10991 __mask, __scale);
936c0fe4
AI
10992}
10993
10994extern __inline __m128i
10995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10996_mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10997 __m128i __index, void const *__addr,
10998 int __scale)
936c0fe4 10999{
6b62f323
JJ
11000 return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old,
11001 __addr,
11002 (__v4si) __index,
11003 __mask, __scale);
936c0fe4
AI
11004}
11005
11006extern __inline __m256i
11007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11008_mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask,
11009 __m128i __index, void const *__addr,
11010 int __scale)
936c0fe4 11011{
6b62f323
JJ
11012 return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old,
11013 __addr,
11014 (__v4si) __index,
11015 __mask, __scale);
936c0fe4
AI
11016}
11017
6b62f323 11018extern __inline __m128i
936c0fe4 11019__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11020_mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask,
11021 __m128i __index, void const *__addr,
11022 int __scale)
936c0fe4 11023{
6b62f323
JJ
11024 return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old,
11025 __addr,
11026 (__v4si) __index,
11027 __mask, __scale);
936c0fe4
AI
11028}
11029
6b62f323 11030extern __inline __m128i
936c0fe4 11031__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11032_mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
11033 __m256i __index, void const *__addr,
11034 int __scale)
936c0fe4 11035{
6b62f323
JJ
11036 return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old,
11037 __addr,
11038 (__v4di) __index,
11039 __mask, __scale);
936c0fe4
AI
11040}
11041
11042extern __inline __m128i
11043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11044_mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
11045 __m128i __index, void const *__addr,
11046 int __scale)
936c0fe4 11047{
6b62f323
JJ
11048 return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old,
11049 __addr,
11050 (__v2di) __index,
11051 __mask, __scale);
936c0fe4
AI
11052}
11053
6b62f323 11054extern __inline __m256i
936c0fe4 11055__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11056_mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask,
11057 __m256i __index, void const *__addr,
11058 int __scale)
936c0fe4 11059{
6b62f323
JJ
11060 return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old,
11061 __addr,
11062 (__v4di) __index,
11063 __mask, __scale);
936c0fe4
AI
11064}
11065
11066extern __inline __m128i
11067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11068_mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask,
11069 __m128i __index, void const *__addr,
11070 int __scale)
936c0fe4 11071{
6b62f323
JJ
11072 return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old,
11073 __addr,
11074 (__v2di) __index,
11075 __mask, __scale);
936c0fe4
AI
11076}
11077
6b62f323 11078extern __inline void
936c0fe4 11079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11080_mm256_i32scatter_ps (void *__addr, __m256i __index,
11081 __m256 __v1, const int __scale)
936c0fe4 11082{
6b62f323
JJ
11083 __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,
11084 (__v8si) __index, (__v8sf) __v1,
11085 __scale);
936c0fe4
AI
11086}
11087
6b62f323 11088extern __inline void
936c0fe4 11089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11090_mm256_mask_i32scatter_ps (void *__addr, __mmask8 __mask,
11091 __m256i __index, __m256 __v1,
11092 const int __scale)
936c0fe4 11093{
6b62f323
JJ
11094 __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,
11095 (__v8sf) __v1, __scale);
936c0fe4
AI
11096}
11097
6b62f323 11098extern __inline void
936c0fe4 11099__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11100_mm_i32scatter_ps (void *__addr, __m128i __index, __m128 __v1,
11101 const int __scale)
936c0fe4 11102{
6b62f323
JJ
11103 __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,
11104 (__v4si) __index, (__v4sf) __v1,
11105 __scale);
936c0fe4
AI
11106}
11107
6b62f323 11108extern __inline void
936c0fe4 11109__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11110_mm_mask_i32scatter_ps (void *__addr, __mmask8 __mask,
11111 __m128i __index, __m128 __v1,
11112 const int __scale)
936c0fe4 11113{
6b62f323
JJ
11114 __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index,
11115 (__v4sf) __v1, __scale);
936c0fe4
AI
11116}
11117
6b62f323 11118extern __inline void
936c0fe4 11119__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11120_mm256_i32scatter_pd (void *__addr, __m128i __index,
11121 __m256d __v1, const int __scale)
936c0fe4 11122{
6b62f323
JJ
11123 __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,
11124 (__v4si) __index, (__v4df) __v1,
11125 __scale);
936c0fe4
AI
11126}
11127
6b62f323 11128extern __inline void
936c0fe4 11129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11130_mm256_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
11131 __m128i __index, __m256d __v1,
11132 const int __scale)
936c0fe4 11133{
6b62f323
JJ
11134 __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index,
11135 (__v4df) __v1, __scale);
936c0fe4
AI
11136}
11137
6b62f323 11138extern __inline void
936c0fe4 11139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11140_mm_i32scatter_pd (void *__addr, __m128i __index,
11141 __m128d __v1, const int __scale)
936c0fe4 11142{
6b62f323
JJ
11143 __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF,
11144 (__v4si) __index, (__v2df) __v1,
11145 __scale);
936c0fe4
AI
11146}
11147
6b62f323 11148extern __inline void
936c0fe4 11149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11150_mm_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
11151 __m128i __index, __m128d __v1,
11152 const int __scale)
936c0fe4 11153{
6b62f323
JJ
11154 __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,
11155 (__v2df) __v1, __scale);
936c0fe4
AI
11156}
11157
6b62f323 11158extern __inline void
936c0fe4 11159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11160_mm256_i64scatter_ps (void *__addr, __m256i __index,
11161 __m128 __v1, const int __scale)
936c0fe4 11162{
6b62f323
JJ
11163 __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF,
11164 (__v4di) __index, (__v4sf) __v1,
11165 __scale);
936c0fe4
AI
11166}
11167
6b62f323 11168extern __inline void
936c0fe4 11169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11170_mm256_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
11171 __m256i __index, __m128 __v1,
11172 const int __scale)
936c0fe4 11173{
6b62f323
JJ
11174 __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index,
11175 (__v4sf) __v1, __scale);
936c0fe4
AI
11176}
11177
6b62f323 11178extern __inline void
936c0fe4 11179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11180_mm_i64scatter_ps (void *__addr, __m128i __index, __m128 __v1,
11181 const int __scale)
936c0fe4 11182{
6b62f323
JJ
11183 __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,
11184 (__v2di) __index, (__v4sf) __v1,
11185 __scale);
936c0fe4
AI
11186}
11187
6b62f323 11188extern __inline void
936c0fe4 11189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11190_mm_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
11191 __m128i __index, __m128 __v1,
11192 const int __scale)
936c0fe4 11193{
6b62f323
JJ
11194 __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,
11195 (__v4sf) __v1, __scale);
936c0fe4
AI
11196}
11197
6b62f323 11198extern __inline void
936c0fe4 11199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11200_mm256_i64scatter_pd (void *__addr, __m256i __index,
11201 __m256d __v1, const int __scale)
936c0fe4 11202{
6b62f323
JJ
11203 __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,
11204 (__v4di) __index, (__v4df) __v1,
11205 __scale);
936c0fe4
AI
11206}
11207
6b62f323 11208extern __inline void
936c0fe4 11209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11210_mm256_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
11211 __m256i __index, __m256d __v1,
11212 const int __scale)
936c0fe4 11213{
6b62f323
JJ
11214 __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,
11215 (__v4df) __v1, __scale);
936c0fe4
AI
11216}
11217
6b62f323 11218extern __inline void
936c0fe4 11219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11220_mm_i64scatter_pd (void *__addr, __m128i __index,
11221 __m128d __v1, const int __scale)
936c0fe4 11222{
6b62f323
JJ
11223 __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF,
11224 (__v2di) __index, (__v2df) __v1,
11225 __scale);
936c0fe4
AI
11226}
11227
6b62f323 11228extern __inline void
936c0fe4 11229__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11230_mm_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
11231 __m128i __index, __m128d __v1,
11232 const int __scale)
936c0fe4 11233{
6b62f323
JJ
11234 __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index,
11235 (__v2df) __v1, __scale);
936c0fe4
AI
11236}
11237
6b62f323 11238extern __inline void
936c0fe4 11239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11240_mm256_i32scatter_epi32 (void *__addr, __m256i __index,
11241 __m256i __v1, const int __scale)
936c0fe4 11242{
6b62f323
JJ
11243 __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,
11244 (__v8si) __index, (__v8si) __v1,
11245 __scale);
936c0fe4
AI
11246}
11247
6b62f323 11248extern __inline void
936c0fe4 11249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11250_mm256_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask,
11251 __m256i __index, __m256i __v1,
11252 const int __scale)
936c0fe4 11253{
6b62f323
JJ
11254 __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,
11255 (__v8si) __v1, __scale);
936c0fe4
AI
11256}
11257
6b62f323 11258extern __inline void
936c0fe4 11259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11260_mm_i32scatter_epi32 (void *__addr, __m128i __index,
11261 __m128i __v1, const int __scale)
936c0fe4 11262{
6b62f323
JJ
11263 __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,
11264 (__v4si) __index, (__v4si) __v1,
11265 __scale);
936c0fe4
AI
11266}
11267
6b62f323 11268extern __inline void
936c0fe4 11269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11270_mm_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask,
11271 __m128i __index, __m128i __v1,
11272 const int __scale)
936c0fe4 11273{
6b62f323
JJ
11274 __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,
11275 (__v4si) __v1, __scale);
936c0fe4
AI
11276}
11277
6b62f323 11278extern __inline void
936c0fe4 11279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11280_mm256_i32scatter_epi64 (void *__addr, __m128i __index,
11281 __m256i __v1, const int __scale)
936c0fe4 11282{
6b62f323
JJ
11283 __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,
11284 (__v4si) __index, (__v4di) __v1,
11285 __scale);
936c0fe4
AI
11286}
11287
6b62f323 11288extern __inline void
936c0fe4 11289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11290_mm256_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
11291 __m128i __index, __m256i __v1,
11292 const int __scale)
936c0fe4 11293{
6b62f323
JJ
11294 __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,
11295 (__v4di) __v1, __scale);
936c0fe4
AI
11296}
11297
6b62f323 11298extern __inline void
936c0fe4 11299__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11300_mm_i32scatter_epi64 (void *__addr, __m128i __index,
11301 __m128i __v1, const int __scale)
936c0fe4 11302{
6b62f323
JJ
11303 __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,
11304 (__v4si) __index, (__v2di) __v1,
11305 __scale);
936c0fe4
AI
11306}
11307
6b62f323 11308extern __inline void
936c0fe4 11309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11310_mm_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
11311 __m128i __index, __m128i __v1,
11312 const int __scale)
936c0fe4 11313{
6b62f323
JJ
11314 __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index,
11315 (__v2di) __v1, __scale);
936c0fe4
AI
11316}
11317
6b62f323 11318extern __inline void
936c0fe4 11319__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11320_mm256_i64scatter_epi32 (void *__addr, __m256i __index,
11321 __m128i __v1, const int __scale)
936c0fe4 11322{
6b62f323
JJ
11323 __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF,
11324 (__v4di) __index, (__v4si) __v1,
11325 __scale);
936c0fe4
AI
11326}
11327
6b62f323 11328extern __inline void
936c0fe4 11329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11330_mm256_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
11331 __m256i __index, __m128i __v1,
11332 const int __scale)
936c0fe4 11333{
6b62f323
JJ
11334 __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index,
11335 (__v4si) __v1, __scale);
936c0fe4
AI
11336}
11337
6b62f323 11338extern __inline void
936c0fe4 11339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11340_mm_i64scatter_epi32 (void *__addr, __m128i __index,
11341 __m128i __v1, const int __scale)
936c0fe4 11342{
6b62f323
JJ
11343 __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,
11344 (__v2di) __index, (__v4si) __v1,
11345 __scale);
936c0fe4
AI
11346}
11347
6b62f323 11348extern __inline void
936c0fe4 11349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11350_mm_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
11351 __m128i __index, __m128i __v1,
11352 const int __scale)
936c0fe4 11353{
6b62f323
JJ
11354 __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,
11355 (__v4si) __v1, __scale);
936c0fe4
AI
11356}
11357
6b62f323 11358extern __inline void
936c0fe4 11359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11360_mm256_i64scatter_epi64 (void *__addr, __m256i __index,
11361 __m256i __v1, const int __scale)
936c0fe4 11362{
6b62f323
JJ
11363 __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF,
11364 (__v4di) __index, (__v4di) __v1,
11365 __scale);
936c0fe4
AI
11366}
11367
6b62f323 11368extern __inline void
936c0fe4 11369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11370_mm256_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
11371 __m256i __index, __m256i __v1,
11372 const int __scale)
936c0fe4 11373{
6b62f323
JJ
11374 __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,
11375 (__v4di) __v1, __scale);
936c0fe4
AI
11376}
11377
6b62f323 11378extern __inline void
936c0fe4 11379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11380_mm_i64scatter_epi64 (void *__addr, __m128i __index,
11381 __m128i __v1, const int __scale)
936c0fe4 11382{
6b62f323
JJ
11383 __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF,
11384 (__v2di) __index, (__v2di) __v1,
11385 __scale);
936c0fe4
AI
11386}
11387
6b62f323 11388extern __inline void
936c0fe4 11389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11390_mm_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
11391 __m128i __index, __m128i __v1,
11392 const int __scale)
936c0fe4 11393{
6b62f323
JJ
11394 __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,
11395 (__v2di) __v1, __scale);
936c0fe4
AI
11396}
11397
11398extern __inline __m256i
11399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11400_mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11401 _MM_PERM_ENUM __mask)
936c0fe4 11402{
6b62f323
JJ
11403 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
11404 (__v8si) __W,
936c0fe4
AI
11405 (__mmask8) __U);
11406}
11407
11408extern __inline __m256i
11409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11410_mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A,
11411 _MM_PERM_ENUM __mask)
936c0fe4 11412{
6b62f323
JJ
11413 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
11414 (__v8si)
936c0fe4
AI
11415 _mm256_setzero_si256 (),
11416 (__mmask8) __U);
11417}
11418
6b62f323 11419extern __inline __m128i
936c0fe4 11420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11421_mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11422 _MM_PERM_ENUM __mask)
936c0fe4 11423{
6b62f323
JJ
11424 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
11425 (__v4si) __W,
936c0fe4
AI
11426 (__mmask8) __U);
11427}
11428
6b62f323 11429extern __inline __m128i
936c0fe4 11430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11431_mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A,
11432 _MM_PERM_ENUM __mask)
936c0fe4 11433{
6b62f323
JJ
11434 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
11435 (__v4si)
11436 _mm_setzero_si128 (),
936c0fe4
AI
11437 (__mmask8) __U);
11438}
11439
6b62f323 11440extern __inline __m256i
936c0fe4 11441__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11442_mm256_rol_epi32 (__m256i __A, const int __B)
936c0fe4 11443{
6b62f323
JJ
11444 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11445 (__v8si)
11446 _mm256_setzero_si256 (),
11447 (__mmask8) -1);
936c0fe4
AI
11448}
11449
6b62f323 11450extern __inline __m256i
936c0fe4 11451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11452_mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11453 const int __B)
936c0fe4 11454{
6b62f323
JJ
11455 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11456 (__v8si) __W,
11457 (__mmask8) __U);
936c0fe4
AI
11458}
11459
6b62f323 11460extern __inline __m256i
936c0fe4 11461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11462_mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B)
936c0fe4 11463{
6b62f323
JJ
11464 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11465 (__v8si)
11466 _mm256_setzero_si256 (),
11467 (__mmask8) __U);
936c0fe4
AI
11468}
11469
6b62f323 11470extern __inline __m128i
936c0fe4 11471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11472_mm_rol_epi32 (__m128i __A, const int __B)
936c0fe4 11473{
6b62f323
JJ
11474 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11475 (__v4si)
11476 _mm_setzero_si128 (),
11477 (__mmask8) -1);
936c0fe4
AI
11478}
11479
6b62f323 11480extern __inline __m128i
936c0fe4 11481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11482_mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11483 const int __B)
936c0fe4 11484{
6b62f323
JJ
11485 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11486 (__v4si) __W,
936c0fe4
AI
11487 (__mmask8) __U);
11488}
11489
6b62f323 11490extern __inline __m128i
936c0fe4 11491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11492_mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B)
936c0fe4 11493{
6b62f323
JJ
11494 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11495 (__v4si)
11496 _mm_setzero_si128 (),
936c0fe4
AI
11497 (__mmask8) __U);
11498}
11499
6b62f323 11500extern __inline __m256i
936c0fe4 11501__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11502_mm256_ror_epi32 (__m256i __A, const int __B)
936c0fe4 11503{
6b62f323
JJ
11504 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11505 (__v8si)
11506 _mm256_setzero_si256 (),
11507 (__mmask8) -1);
936c0fe4
AI
11508}
11509
6b62f323 11510extern __inline __m256i
936c0fe4 11511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11512_mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11513 const int __B)
936c0fe4 11514{
6b62f323
JJ
11515 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11516 (__v8si) __W,
11517 (__mmask8) __U);
936c0fe4
AI
11518}
11519
11520extern __inline __m256i
11521__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11522_mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B)
936c0fe4 11523{
6b62f323
JJ
11524 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11525 (__v8si)
11526 _mm256_setzero_si256 (),
11527 (__mmask8) __U);
936c0fe4
AI
11528}
11529
6b62f323 11530extern __inline __m128i
936c0fe4 11531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11532_mm_ror_epi32 (__m128i __A, const int __B)
936c0fe4 11533{
6b62f323
JJ
11534 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11535 (__v4si)
11536 _mm_setzero_si128 (),
11537 (__mmask8) -1);
936c0fe4
AI
11538}
11539
6b62f323 11540extern __inline __m128i
936c0fe4 11541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11542_mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11543 const int __B)
936c0fe4 11544{
6b62f323
JJ
11545 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11546 (__v4si) __W,
11547 (__mmask8) __U);
936c0fe4
AI
11548}
11549
6b62f323 11550extern __inline __m128i
936c0fe4 11551__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11552_mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B)
936c0fe4 11553{
6b62f323
JJ
11554 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11555 (__v4si)
11556 _mm_setzero_si128 (),
11557 (__mmask8) __U);
936c0fe4
AI
11558}
11559
6b62f323 11560extern __inline __m256i
936c0fe4 11561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11562_mm256_rol_epi64 (__m256i __A, const int __B)
936c0fe4 11563{
6b62f323
JJ
11564 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11565 (__v4di)
11566 _mm256_setzero_si256 (),
11567 (__mmask8) -1);
936c0fe4
AI
11568}
11569
6b62f323 11570extern __inline __m256i
936c0fe4 11571__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11572_mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11573 const int __B)
936c0fe4 11574{
6b62f323
JJ
11575 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11576 (__v4di) __W,
11577 (__mmask8) __U);
936c0fe4
AI
11578}
11579
6b62f323 11580extern __inline __m256i
936c0fe4 11581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11582_mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B)
11583{
11584 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11585 (__v4di)
11586 _mm256_setzero_si256 (),
11587 (__mmask8) __U);
936c0fe4
AI
11588}
11589
6b62f323 11590extern __inline __m128i
936c0fe4 11591__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11592_mm_rol_epi64 (__m128i __A, const int __B)
936c0fe4 11593{
6b62f323
JJ
11594 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11595 (__v2di)
11596 _mm_setzero_si128 (),
936c0fe4
AI
11597 (__mmask8) -1);
11598}
11599
6b62f323 11600extern __inline __m128i
936c0fe4 11601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11602_mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11603 const int __B)
936c0fe4 11604{
6b62f323
JJ
11605 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11606 (__v2di) __W,
11607 (__mmask8) __U);
936c0fe4
AI
11608}
11609
6b62f323 11610extern __inline __m128i
936c0fe4 11611__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11612_mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
936c0fe4 11613{
6b62f323
JJ
11614 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11615 (__v2di)
11616 _mm_setzero_si128 (),
11617 (__mmask8) __U);
936c0fe4
AI
11618}
11619
6b62f323 11620extern __inline __m256i
936c0fe4 11621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11622_mm256_ror_epi64 (__m256i __A, const int __B)
936c0fe4 11623{
6b62f323
JJ
11624 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11625 (__v4di)
11626 _mm256_setzero_si256 (),
11627 (__mmask8) -1);
936c0fe4
AI
11628}
11629
6b62f323 11630extern __inline __m256i
936c0fe4 11631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11632_mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11633 const int __B)
936c0fe4 11634{
6b62f323
JJ
11635 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11636 (__v4di) __W,
11637 (__mmask8) __U);
936c0fe4
AI
11638}
11639
6b62f323 11640extern __inline __m256i
936c0fe4 11641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11642_mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B)
936c0fe4 11643{
6b62f323
JJ
11644 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11645 (__v4di)
11646 _mm256_setzero_si256 (),
936c0fe4
AI
11647 (__mmask8) __U);
11648}
11649
6b62f323 11650extern __inline __m128i
936c0fe4 11651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11652_mm_ror_epi64 (__m128i __A, const int __B)
936c0fe4 11653{
6b62f323
JJ
11654 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11655 (__v2di)
11656 _mm_setzero_si128 (),
11657 (__mmask8) -1);
936c0fe4
AI
11658}
11659
6b62f323 11660extern __inline __m128i
936c0fe4 11661__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11662_mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11663 const int __B)
936c0fe4 11664{
6b62f323
JJ
11665 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11666 (__v2di) __W,
11667 (__mmask8) __U);
936c0fe4
AI
11668}
11669
6b62f323 11670extern __inline __m128i
936c0fe4 11671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11672_mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
936c0fe4 11673{
6b62f323
JJ
11674 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11675 (__v2di)
11676 _mm_setzero_si128 (),
11677 (__mmask8) __U);
936c0fe4
AI
11678}
11679
6b62f323 11680extern __inline __m128i
936c0fe4 11681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11682_mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm)
936c0fe4 11683{
6b62f323
JJ
11684 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11685 (__v4si) __B, __imm,
11686 (__v4si)
11687 _mm_setzero_si128 (),
11688 (__mmask8) -1);
936c0fe4
AI
11689}
11690
6b62f323 11691extern __inline __m128i
936c0fe4 11692__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11693_mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11694 __m128i __B, const int __imm)
936c0fe4 11695{
6b62f323
JJ
11696 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11697 (__v4si) __B, __imm,
11698 (__v4si) __W,
936c0fe4
AI
11699 (__mmask8) __U);
11700}
11701
6b62f323 11702extern __inline __m128i
936c0fe4 11703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11704_mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B,
11705 const int __imm)
936c0fe4 11706{
6b62f323
JJ
11707 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11708 (__v4si) __B, __imm,
11709 (__v4si)
11710 _mm_setzero_si128 (),
11711 (__mmask8) __U);
936c0fe4
AI
11712}
11713
6b62f323 11714extern __inline __m128i
936c0fe4 11715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11716_mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
936c0fe4 11717{
6b62f323
JJ
11718 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11719 (__v2di) __B, __imm,
11720 (__v2di)
11721 _mm_setzero_si128 (),
11722 (__mmask8) -1);
936c0fe4
AI
11723}
11724
6b62f323 11725extern __inline __m128i
936c0fe4 11726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11727_mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11728 __m128i __B, const int __imm)
936c0fe4 11729{
6b62f323
JJ
11730 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11731 (__v2di) __B, __imm,
11732 (__v2di) __W,
11733 (__mmask8) __U);
936c0fe4
AI
11734}
11735
6b62f323 11736extern __inline __m128i
936c0fe4 11737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11738_mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
11739 const int __imm)
936c0fe4 11740{
6b62f323
JJ
11741 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11742 (__v2di) __B, __imm,
11743 (__v2di)
11744 _mm_setzero_si128 (),
11745 (__mmask8) __U);
936c0fe4
AI
11746}
11747
6b62f323 11748extern __inline __m256i
936c0fe4 11749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11750_mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm)
936c0fe4 11751{
6b62f323
JJ
11752 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11753 (__v8si) __B, __imm,
11754 (__v8si)
11755 _mm256_setzero_si256 (),
936c0fe4
AI
11756 (__mmask8) -1);
11757}
11758
6b62f323 11759extern __inline __m256i
936c0fe4 11760__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11761_mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11762 __m256i __B, const int __imm)
936c0fe4 11763{
6b62f323
JJ
11764 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11765 (__v8si) __B, __imm,
11766 (__v8si) __W,
11767 (__mmask8) __U);
936c0fe4
AI
11768}
11769
6b62f323 11770extern __inline __m256i
936c0fe4 11771__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11772_mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B,
11773 const int __imm)
936c0fe4 11774{
6b62f323
JJ
11775 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11776 (__v8si) __B, __imm,
11777 (__v8si)
11778 _mm256_setzero_si256 (),
11779 (__mmask8) __U);
936c0fe4
AI
11780}
11781
6b62f323 11782extern __inline __m256i
936c0fe4 11783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11784_mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm)
936c0fe4 11785{
6b62f323
JJ
11786 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11787 (__v4di) __B, __imm,
11788 (__v4di)
11789 _mm256_setzero_si256 (),
11790 (__mmask8) -1);
936c0fe4
AI
11791}
11792
6b62f323 11793extern __inline __m256i
936c0fe4 11794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11795_mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11796 __m256i __B, const int __imm)
936c0fe4 11797{
6b62f323
JJ
11798 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11799 (__v4di) __B, __imm,
11800 (__v4di) __W,
936c0fe4
AI
11801 (__mmask8) __U);
11802}
11803
6b62f323 11804extern __inline __m256i
936c0fe4 11805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11806_mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B,
11807 const int __imm)
936c0fe4 11808{
6b62f323
JJ
11809 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11810 (__v4di) __B, __imm,
11811 (__v4di)
11812 _mm256_setzero_si256 (),
936c0fe4
AI
11813 (__mmask8) __U);
11814}
11815
6b62f323 11816extern __inline __m128i
936c0fe4 11817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11818_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A,
11819 const int __I)
936c0fe4 11820{
6b62f323
JJ
11821 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11822 (__v8hi) __W,
936c0fe4
AI
11823 (__mmask8) __U);
11824}
11825
6b62f323 11826extern __inline __m128i
936c0fe4 11827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11828_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
936c0fe4 11829{
6b62f323
JJ
11830 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11831 (__v8hi)
11832 _mm_setzero_si128 (),
936c0fe4
AI
11833 (__mmask8) __U);
11834}
11835
6b62f323 11836extern __inline __m128i
936c0fe4 11837__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11838_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A,
11839 const int __I)
936c0fe4 11840{
6b62f323
JJ
11841 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11842 (__v8hi) __W,
11843 (__mmask8) __U);
936c0fe4
AI
11844}
11845
6b62f323
JJ
11846extern __inline __m128i
11847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11848_mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
eee5d6f5 11849{
6b62f323
JJ
11850 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11851 (__v8hi)
11852 _mm_setzero_si128 (),
11853 (__mmask8) __U);
eee5d6f5
AI
11854}
11855
6b62f323
JJ
11856extern __inline __m256i
11857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11858_mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11859 const int __imm)
936c0fe4 11860{
6b62f323
JJ
11861 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11862 (__v8si) __W,
11863 (__mmask8) __U);
936c0fe4
AI
11864}
11865
6b62f323
JJ
11866extern __inline __m256i
11867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11868_mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm)
eee5d6f5 11869{
6b62f323
JJ
11870 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11871 (__v8si)
11872 _mm256_setzero_si256 (),
11873 (__mmask8) __U);
eee5d6f5
AI
11874}
11875
6b62f323
JJ
11876extern __inline __m128i
11877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11878_mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11879 const int __imm)
936c0fe4 11880{
6b62f323
JJ
11881 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11882 (__v4si) __W,
11883 (__mmask8) __U);
936c0fe4
AI
11884}
11885
6b62f323
JJ
11886extern __inline __m128i
11887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11888_mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm)
eee5d6f5 11889{
6b62f323
JJ
11890 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11891 (__v4si)
11892 _mm_setzero_si128 (),
11893 (__mmask8) __U);
eee5d6f5
AI
11894}
11895
6b62f323
JJ
11896extern __inline __m256i
11897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11898_mm256_srai_epi64 (__m256i __A, const int __imm)
936c0fe4 11899{
6b62f323
JJ
11900 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11901 (__v4di)
11902 _mm256_setzero_si256 (),
c42b0bdf 11903 (__mmask8) -1);
936c0fe4
AI
11904}
11905
6b62f323
JJ
11906extern __inline __m256i
11907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11908_mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11909 const int __imm)
936c0fe4 11910{
6b62f323
JJ
11911 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11912 (__v4di) __W,
11913 (__mmask8) __U);
936c0fe4
AI
11914}
11915
6b62f323
JJ
11916extern __inline __m256i
11917__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11918_mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm)
eee5d6f5 11919{
6b62f323
JJ
11920 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11921 (__v4di)
11922 _mm256_setzero_si256 (),
11923 (__mmask8) __U);
eee5d6f5
AI
11924}
11925
6b62f323
JJ
11926extern __inline __m128i
11927__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11928_mm_srai_epi64 (__m128i __A, const int __imm)
936c0fe4 11929{
6b62f323
JJ
11930 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11931 (__v2di)
11932 _mm_setzero_si128 (),
c42b0bdf 11933 (__mmask8) -1);
936c0fe4
AI
11934}
11935
6b62f323
JJ
11936extern __inline __m128i
11937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11938_mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11939 const int __imm)
936c0fe4 11940{
6b62f323
JJ
11941 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11942 (__v2di) __W,
11943 (__mmask8) __U);
936c0fe4
AI
11944}
11945
6b62f323
JJ
11946extern __inline __m128i
11947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11948_mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm)
eee5d6f5 11949{
6b62f323
JJ
11950 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11951 (__v2di)
11952 _mm_setzero_si128 (),
11953 (__mmask8) __U);
eee5d6f5
AI
11954}
11955
6b62f323
JJ
11956extern __inline __m128i
11957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11958_mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
936c0fe4 11959{
6b62f323
JJ
11960 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11961 (__v4si) __W,
11962 (__mmask8) __U);
936c0fe4
AI
11963}
11964
6b62f323
JJ
11965extern __inline __m128i
11966__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11967_mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B)
eee5d6f5 11968{
6b62f323
JJ
11969 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11970 (__v4si)
11971 _mm_setzero_si128 (),
11972 (__mmask8) __U);
eee5d6f5
AI
11973}
11974
6b62f323
JJ
11975extern __inline __m128i
11976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11977_mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
936c0fe4 11978{
6b62f323
JJ
11979 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11980 (__v2di) __W,
11981 (__mmask8) __U);
936c0fe4
AI
11982}
11983
6b62f323
JJ
11984extern __inline __m128i
11985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11986_mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
eee5d6f5 11987{
6b62f323
JJ
11988 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11989 (__v2di)
11990 _mm_setzero_si128 (),
11991 (__mmask8) __U);
eee5d6f5
AI
11992}
11993
6b62f323
JJ
11994extern __inline __m256i
11995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11996_mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11997 int __B)
936c0fe4 11998{
6b62f323
JJ
11999 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
12000 (__v8si) __W,
12001 (__mmask8) __U);
936c0fe4
AI
12002}
12003
6b62f323
JJ
12004extern __inline __m256i
12005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12006_mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B)
12007{
12008 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
12009 (__v8si)
12010 _mm256_setzero_si256 (),
12011 (__mmask8) __U);
eee5d6f5
AI
12012}
12013
6b62f323
JJ
12014extern __inline __m256i
12015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12016_mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
12017 int __B)
936c0fe4 12018{
6b62f323
JJ
12019 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
12020 (__v4di) __W,
12021 (__mmask8) __U);
936c0fe4
AI
12022}
12023
6b62f323
JJ
12024extern __inline __m256i
12025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12026_mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B)
eee5d6f5 12027{
6b62f323
JJ
12028 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
12029 (__v4di)
12030 _mm256_setzero_si256 (),
12031 (__mmask8) __U);
eee5d6f5
AI
12032}
12033
6b62f323
JJ
12034extern __inline __m256d
12035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12036_mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X,
12037 const int __imm)
936c0fe4 12038{
6b62f323
JJ
12039 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
12040 (__v4df) __W,
12041 (__mmask8) __U);
936c0fe4
AI
12042}
12043
6b62f323
JJ
12044extern __inline __m256d
12045__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12046_mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm)
eee5d6f5 12047{
6b62f323
JJ
12048 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
12049 (__v4df)
12050 _mm256_setzero_pd (),
12051 (__mmask8) __U);
eee5d6f5
AI
12052}
12053
6b62f323
JJ
12054extern __inline __m256d
12055__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12056_mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X,
12057 const int __C)
936c0fe4 12058{
6b62f323
JJ
12059 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
12060 (__v4df) __W,
12061 (__mmask8) __U);
936c0fe4
AI
12062}
12063
6b62f323
JJ
12064extern __inline __m256d
12065__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12066_mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C)
eee5d6f5 12067{
6b62f323
JJ
12068 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
12069 (__v4df)
12070 _mm256_setzero_pd (),
12071 (__mmask8) __U);
eee5d6f5
AI
12072}
12073
6b62f323
JJ
12074extern __inline __m128d
12075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12076_mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X,
12077 const int __C)
936c0fe4 12078{
6b62f323
JJ
12079 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
12080 (__v2df) __W,
12081 (__mmask8) __U);
936c0fe4
AI
12082}
12083
6b62f323
JJ
12084extern __inline __m128d
12085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12086_mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C)
eee5d6f5 12087{
6b62f323
JJ
12088 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
12089 (__v2df)
12090 _mm_setzero_pd (),
12091 (__mmask8) __U);
eee5d6f5
AI
12092}
12093
6b62f323
JJ
12094extern __inline __m256
12095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12096_mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X,
12097 const int __C)
936c0fe4 12098{
6b62f323
JJ
12099 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
12100 (__v8sf) __W,
12101 (__mmask8) __U);
936c0fe4
AI
12102}
12103
6b62f323
JJ
12104extern __inline __m256
12105__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12106_mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C)
eee5d6f5 12107{
6b62f323
JJ
12108 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
12109 (__v8sf)
12110 _mm256_setzero_ps (),
12111 (__mmask8) __U);
eee5d6f5
AI
12112}
12113
6b62f323
JJ
12114extern __inline __m128
12115__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12116_mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X,
12117 const int __C)
936c0fe4 12118{
6b62f323
JJ
12119 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
12120 (__v4sf) __W,
12121 (__mmask8) __U);
936c0fe4
AI
12122}
12123
6b62f323
JJ
12124extern __inline __m128
12125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12126_mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
eee5d6f5 12127{
6b62f323
JJ
12128 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
12129 (__v4sf)
12130 _mm_setzero_ps (),
12131 (__mmask8) __U);
eee5d6f5
AI
12132}
12133
6b62f323
JJ
12134extern __inline __m256d
12135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12136_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
936c0fe4 12137{
6b62f323
JJ
12138 return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
12139 (__v4df) __W,
12140 (__mmask8) __U);
936c0fe4
AI
12141}
12142
6b62f323
JJ
12143extern __inline __m256
12144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12145_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
eee5d6f5 12146{
6b62f323
JJ
12147 return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
12148 (__v8sf) __W,
12149 (__mmask8) __U);
eee5d6f5
AI
12150}
12151
6b62f323
JJ
12152extern __inline __m256i
12153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12154_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
936c0fe4 12155{
6b62f323
JJ
12156 return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
12157 (__v4di) __W,
12158 (__mmask8) __U);
936c0fe4
AI
12159}
12160
6b62f323
JJ
12161extern __inline __m256i
12162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12163_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
eee5d6f5 12164{
6b62f323
JJ
12165 return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
12166 (__v8si) __W,
12167 (__mmask8) __U);
eee5d6f5
AI
12168}
12169
6b62f323
JJ
12170extern __inline __m128d
12171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12172_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
936c0fe4 12173{
6b62f323
JJ
12174 return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
12175 (__v2df) __W,
12176 (__mmask8) __U);
936c0fe4
AI
12177}
12178
6b62f323
JJ
12179extern __inline __m128
12180__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12181_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
eee5d6f5 12182{
6b62f323
JJ
12183 return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
12184 (__v4sf) __W,
12185 (__mmask8) __U);
eee5d6f5
AI
12186}
12187
6b62f323
JJ
12188extern __inline __m128i
12189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12190_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
936c0fe4 12191{
6b62f323
JJ
12192 return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
12193 (__v2di) __W,
12194 (__mmask8) __U);
936c0fe4
AI
12195}
12196
6b62f323
JJ
12197extern __inline __m128i
12198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12199_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
eee5d6f5 12200{
6b62f323
JJ
12201 return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
12202 (__v4si) __W,
12203 (__mmask8) __U);
eee5d6f5
AI
12204}
12205
936c0fe4 12206extern __inline __mmask8
6b62f323
JJ
12207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12208_mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
936c0fe4 12209{
6b62f323
JJ
12210 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12211 (__v4di) __Y, __P,
12212 (__mmask8) -1);
936c0fe4
AI
12213}
12214
eee5d6f5 12215extern __inline __mmask8
6b62f323
JJ
12216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12217_mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P)
eee5d6f5 12218{
6b62f323
JJ
12219 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
12220 (__v8si) __Y, __P,
12221 (__mmask8) -1);
eee5d6f5
AI
12222}
12223
936c0fe4 12224extern __inline __mmask8
6b62f323
JJ
12225__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12226_mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P)
936c0fe4 12227{
6b62f323
JJ
12228 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
12229 (__v4di) __Y, __P,
c42b0bdf 12230 (__mmask8) -1);
936c0fe4
AI
12231}
12232
eee5d6f5 12233extern __inline __mmask8
6b62f323
JJ
12234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12235_mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P)
eee5d6f5 12236{
6b62f323
JJ
12237 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
12238 (__v8si) __Y, __P,
12239 (__mmask8) -1);
eee5d6f5
AI
12240}
12241
936c0fe4 12242extern __inline __mmask8
6b62f323
JJ
12243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12244_mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P)
936c0fe4 12245{
6b62f323
JJ
12246 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
12247 (__v4df) __Y, __P,
c42b0bdf 12248 (__mmask8) -1);
936c0fe4
AI
12249}
12250
eee5d6f5 12251extern __inline __mmask8
6b62f323
JJ
12252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12253_mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P)
eee5d6f5 12254{
6b62f323
JJ
12255 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
12256 (__v8sf) __Y, __P,
12257 (__mmask8) -1);
eee5d6f5
AI
12258}
12259
936c0fe4 12260extern __inline __mmask8
6b62f323
JJ
12261__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12262_mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12263 const int __P)
936c0fe4 12264{
6b62f323
JJ
12265 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12266 (__v4di) __Y, __P,
12267 (__mmask8) __U);
936c0fe4
AI
12268}
12269
eee5d6f5 12270extern __inline __mmask8
6b62f323
JJ
12271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12272_mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12273 const int __P)
eee5d6f5 12274{
6b62f323
JJ
12275 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
12276 (__v8si) __Y, __P,
12277 (__mmask8) __U);
eee5d6f5
AI
12278}
12279
936c0fe4 12280extern __inline __mmask8
6b62f323
JJ
12281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12282_mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12283 const int __P)
936c0fe4 12284{
6b62f323
JJ
12285 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
12286 (__v4di) __Y, __P,
12287 (__mmask8) __U);
936c0fe4
AI
12288}
12289
eee5d6f5 12290extern __inline __mmask8
6b62f323
JJ
12291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12292_mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12293 const int __P)
eee5d6f5 12294{
6b62f323
JJ
12295 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
12296 (__v8si) __Y, __P,
12297 (__mmask8) __U);
eee5d6f5
AI
12298}
12299
936c0fe4 12300extern __inline __mmask8
6b62f323
JJ
12301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12302_mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y,
12303 const int __P)
936c0fe4 12304{
6b62f323
JJ
12305 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
12306 (__v4df) __Y, __P,
12307 (__mmask8) __U);
936c0fe4
AI
12308}
12309
eee5d6f5 12310extern __inline __mmask8
6b62f323
JJ
12311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12312_mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y,
12313 const int __P)
eee5d6f5 12314{
6b62f323
JJ
12315 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
12316 (__v8sf) __Y, __P,
12317 (__mmask8) __U);
eee5d6f5
AI
12318}
12319
936c0fe4 12320extern __inline __mmask8
6b62f323
JJ
12321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12322_mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P)
936c0fe4 12323{
6b62f323
JJ
12324 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12325 (__v2di) __Y, __P,
c42b0bdf 12326 (__mmask8) -1);
936c0fe4
AI
12327}
12328
eee5d6f5 12329extern __inline __mmask8
6b62f323
JJ
12330__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12331_mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P)
eee5d6f5
AI
12332{
12333 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
6b62f323
JJ
12334 (__v4si) __Y, __P,
12335 (__mmask8) -1);
eee5d6f5
AI
12336}
12337
936c0fe4 12338extern __inline __mmask8
6b62f323
JJ
12339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12340_mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P)
936c0fe4 12341{
6b62f323
JJ
12342 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12343 (__v2di) __Y, __P,
12344 (__mmask8) -1);
936c0fe4
AI
12345}
12346
eee5d6f5 12347extern __inline __mmask8
6b62f323
JJ
12348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12349_mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P)
eee5d6f5 12350{
6b62f323
JJ
12351 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12352 (__v4si) __Y, __P,
12353 (__mmask8) -1);
eee5d6f5
AI
12354}
12355
936c0fe4 12356extern __inline __mmask8
6b62f323
JJ
12357__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12358_mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P)
936c0fe4 12359{
6b62f323
JJ
12360 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
12361 (__v2df) __Y, __P,
12362 (__mmask8) -1);
936c0fe4
AI
12363}
12364
eee5d6f5 12365extern __inline __mmask8
6b62f323
JJ
12366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12367_mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P)
eee5d6f5 12368{
6b62f323
JJ
12369 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
12370 (__v4sf) __Y, __P,
12371 (__mmask8) -1);
eee5d6f5
AI
12372}
12373
936c0fe4 12374extern __inline __mmask8
6b62f323
JJ
12375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12376_mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12377 const int __P)
936c0fe4
AI
12378{
12379 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
6b62f323
JJ
12380 (__v2di) __Y, __P,
12381 (__mmask8) __U);
936c0fe4
AI
12382}
12383
eee5d6f5 12384extern __inline __mmask8
6b62f323
JJ
12385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12386_mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12387 const int __P)
eee5d6f5 12388{
6b62f323
JJ
12389 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12390 (__v4si) __Y, __P,
12391 (__mmask8) __U);
eee5d6f5
AI
12392}
12393
936c0fe4 12394extern __inline __mmask8
6b62f323
JJ
12395__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12396_mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12397 const int __P)
936c0fe4 12398{
6b62f323
JJ
12399 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12400 (__v2di) __Y, __P,
12401 (__mmask8) __U);
936c0fe4
AI
12402}
12403
eee5d6f5 12404extern __inline __mmask8
6b62f323
JJ
12405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12406_mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12407 const int __P)
eee5d6f5 12408{
6b62f323
JJ
12409 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12410 (__v4si) __Y, __P,
12411 (__mmask8) __U);
eee5d6f5
AI
12412}
12413
936c0fe4 12414extern __inline __mmask8
6b62f323
JJ
12415__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12416_mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y,
12417 const int __P)
936c0fe4 12418{
6b62f323
JJ
12419 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
12420 (__v2df) __Y, __P,
12421 (__mmask8) __U);
936c0fe4
AI
12422}
12423
eee5d6f5 12424extern __inline __mmask8
6b62f323
JJ
12425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12426_mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y,
12427 const int __P)
eee5d6f5 12428{
6b62f323
JJ
12429 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
12430 (__v4sf) __Y, __P,
12431 (__mmask8) __U);
eee5d6f5
AI
12432}
12433
6b62f323
JJ
12434extern __inline __m256d
12435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12436_mm256_permutex_pd (__m256d __X, const int __M)
936c0fe4 12437{
6b62f323
JJ
12438 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M,
12439 (__v4df)
12440 _mm256_undefined_pd (),
12441 (__mmask8) -1);
936c0fe4
AI
12442}
12443
12444#else
12445#define _mm256_permutex_pd(X, M) \
12446 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M), \
a25a7887
JJ
12447 (__v4df)(__m256d) \
12448 _mm256_undefined_pd (), \
936c0fe4
AI
12449 (__mmask8)-1))
12450
395a191d
SP
12451#define _mm256_permutex_epi64(X, I) \
12452 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12453 (int)(I), \
12454 (__v4di)(__m256i) \
12455 (_mm256_setzero_si256 ()),\
12456 (__mmask8) -1))
12457
936c0fe4
AI
12458#define _mm256_maskz_permutex_epi64(M, X, I) \
12459 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12460 (int)(I), \
12461 (__v4di)(__m256i) \
a25a7887 12462 (_mm256_setzero_si256 ()),\
936c0fe4
AI
12463 (__mmask8)(M)))
12464
12465#define _mm256_mask_permutex_epi64(W, M, X, I) \
12466 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12467 (int)(I), \
12468 (__v4di)(__m256i)(W), \
12469 (__mmask8)(M)))
12470
12471#define _mm256_insertf32x4(X, Y, C) \
12472 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12473 (__v4sf)(__m128) (Y), (int) (C), \
a25a7887 12474 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12475 (__mmask8)-1))
12476
12477#define _mm256_mask_insertf32x4(W, U, X, Y, C) \
12478 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12479 (__v4sf)(__m128) (Y), (int) (C), \
12480 (__v8sf)(__m256)(W), \
12481 (__mmask8)(U)))
12482
12483#define _mm256_maskz_insertf32x4(U, X, Y, C) \
12484 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12485 (__v4sf)(__m128) (Y), (int) (C), \
a25a7887 12486 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12487 (__mmask8)(U)))
12488
12489#define _mm256_inserti32x4(X, Y, C) \
12490 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12491 (__v4si)(__m128i) (Y), (int) (C), \
a25a7887 12492 (__v8si)(__m256i)_mm256_setzero_si256 (), \
936c0fe4
AI
12493 (__mmask8)-1))
12494
12495#define _mm256_mask_inserti32x4(W, U, X, Y, C) \
12496 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12497 (__v4si)(__m128i) (Y), (int) (C), \
12498 (__v8si)(__m256i)(W), \
12499 (__mmask8)(U)))
12500
12501#define _mm256_maskz_inserti32x4(U, X, Y, C) \
12502 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12503 (__v4si)(__m128i) (Y), (int) (C), \
a25a7887 12504 (__v8si)(__m256i)_mm256_setzero_si256 (), \
936c0fe4
AI
12505 (__mmask8)(U)))
12506
12507#define _mm256_extractf32x4_ps(X, C) \
12508 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12509 (int) (C), \
a25a7887 12510 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
12511 (__mmask8)-1))
12512
12513#define _mm256_mask_extractf32x4_ps(W, U, X, C) \
12514 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12515 (int) (C), \
12516 (__v4sf)(__m128)(W), \
12517 (__mmask8)(U)))
12518
12519#define _mm256_maskz_extractf32x4_ps(U, X, C) \
12520 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12521 (int) (C), \
a25a7887 12522 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
12523 (__mmask8)(U)))
12524
12525#define _mm256_extracti32x4_epi32(X, C) \
12526 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12527 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1))
12528
12529#define _mm256_mask_extracti32x4_epi32(W, U, X, C) \
12530 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12531 (int) (C), (__v4si)(__m128i)(W), (__mmask8)(U)))
12532
12533#define _mm256_maskz_extracti32x4_epi32(U, X, C) \
12534 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12535 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
12536
12537#define _mm256_shuffle_i64x2(X, Y, C) \
12538 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12539 (__v4di)(__m256i)(Y), (int)(C), \
12540 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12541 (__mmask8)-1))
12542
12543#define _mm256_mask_shuffle_i64x2(W, U, X, Y, C) \
12544 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12545 (__v4di)(__m256i)(Y), (int)(C), \
12546 (__v4di)(__m256i)(W),\
12547 (__mmask8)(U)))
12548
12549#define _mm256_maskz_shuffle_i64x2(U, X, Y, C) \
12550 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12551 (__v4di)(__m256i)(Y), (int)(C), \
12552 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12553 (__mmask8)(U)))
12554
12555#define _mm256_shuffle_i32x4(X, Y, C) \
12556 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12557 (__v8si)(__m256i)(Y), (int)(C), \
a25a7887
JJ
12558 (__v8si)(__m256i) \
12559 _mm256_setzero_si256 (), \
936c0fe4
AI
12560 (__mmask8)-1))
12561
12562#define _mm256_mask_shuffle_i32x4(W, U, X, Y, C) \
12563 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12564 (__v8si)(__m256i)(Y), (int)(C), \
12565 (__v8si)(__m256i)(W), \
12566 (__mmask8)(U)))
12567
12568#define _mm256_maskz_shuffle_i32x4(U, X, Y, C) \
12569 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12570 (__v8si)(__m256i)(Y), (int)(C), \
a25a7887
JJ
12571 (__v8si)(__m256i) \
12572 _mm256_setzero_si256 (), \
936c0fe4
AI
12573 (__mmask8)(U)))
12574
12575#define _mm256_shuffle_f64x2(X, Y, C) \
12576 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12577 (__v4df)(__m256d)(Y), (int)(C), \
a25a7887 12578 (__v4df)(__m256d)_mm256_setzero_pd (),\
936c0fe4
AI
12579 (__mmask8)-1))
12580
12581#define _mm256_mask_shuffle_f64x2(W, U, X, Y, C) \
12582 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12583 (__v4df)(__m256d)(Y), (int)(C), \
12584 (__v4df)(__m256d)(W), \
12585 (__mmask8)(U)))
12586
12587#define _mm256_maskz_shuffle_f64x2(U, X, Y, C) \
12588 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12589 (__v4df)(__m256d)(Y), (int)(C), \
a25a7887 12590 (__v4df)(__m256d)_mm256_setzero_pd( ),\
936c0fe4
AI
12591 (__mmask8)(U)))
12592
12593#define _mm256_shuffle_f32x4(X, Y, C) \
12594 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12595 (__v8sf)(__m256)(Y), (int)(C), \
a25a7887 12596 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12597 (__mmask8)-1))
12598
12599#define _mm256_mask_shuffle_f32x4(W, U, X, Y, C) \
12600 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12601 (__v8sf)(__m256)(Y), (int)(C), \
12602 (__v8sf)(__m256)(W), \
12603 (__mmask8)(U)))
12604
12605#define _mm256_maskz_shuffle_f32x4(U, X, Y, C) \
12606 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12607 (__v8sf)(__m256)(Y), (int)(C), \
a25a7887 12608 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12609 (__mmask8)(U)))
12610
12611#define _mm256_mask_shuffle_pd(W, U, A, B, C) \
12612 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
12613 (__v4df)(__m256d)(B), (int)(C), \
12614 (__v4df)(__m256d)(W), \
12615 (__mmask8)(U)))
12616
12617#define _mm256_maskz_shuffle_pd(U, A, B, C) \
12618 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
12619 (__v4df)(__m256d)(B), (int)(C), \
a25a7887
JJ
12620 (__v4df)(__m256d) \
12621 _mm256_setzero_pd (), \
936c0fe4
AI
12622 (__mmask8)(U)))
12623
12624#define _mm_mask_shuffle_pd(W, U, A, B, C) \
12625 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
12626 (__v2df)(__m128d)(B), (int)(C), \
12627 (__v2df)(__m128d)(W), \
12628 (__mmask8)(U)))
12629
12630#define _mm_maskz_shuffle_pd(U, A, B, C) \
12631 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
12632 (__v2df)(__m128d)(B), (int)(C), \
a25a7887 12633 (__v2df)(__m128d)_mm_setzero_pd (), \
936c0fe4
AI
12634 (__mmask8)(U)))
12635
12636#define _mm256_mask_shuffle_ps(W, U, A, B, C) \
12637 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
12638 (__v8sf)(__m256)(B), (int)(C), \
12639 (__v8sf)(__m256)(W), \
12640 (__mmask8)(U)))
12641
12642#define _mm256_maskz_shuffle_ps(U, A, B, C) \
12643 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
12644 (__v8sf)(__m256)(B), (int)(C), \
a25a7887 12645 (__v8sf)(__m256)_mm256_setzero_ps (),\
936c0fe4
AI
12646 (__mmask8)(U)))
12647
12648#define _mm_mask_shuffle_ps(W, U, A, B, C) \
12649 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
12650 (__v4sf)(__m128)(B), (int)(C), \
12651 (__v4sf)(__m128)(W), \
12652 (__mmask8)(U)))
12653
12654#define _mm_maskz_shuffle_ps(U, A, B, C) \
12655 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
12656 (__v4sf)(__m128)(B), (int)(C), \
a25a7887 12657 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
12658 (__mmask8)(U)))
12659
12660#define _mm256_fixupimm_pd(X, Y, Z, C) \
12661 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
12662 (__v4df)(__m256d)(Y), \
12663 (__v4di)(__m256i)(Z), (int)(C), \
12664 (__mmask8)(-1)))
12665
12666#define _mm256_mask_fixupimm_pd(X, U, Y, Z, C) \
12667 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
12668 (__v4df)(__m256d)(Y), \
12669 (__v4di)(__m256i)(Z), (int)(C), \
12670 (__mmask8)(U)))
12671
12672#define _mm256_maskz_fixupimm_pd(U, X, Y, Z, C) \
12673 ((__m256d)__builtin_ia32_fixupimmpd256_maskz ((__v4df)(__m256d)(X), \
12674 (__v4df)(__m256d)(Y), \
12675 (__v4di)(__m256i)(Z), (int)(C),\
12676 (__mmask8)(U)))
12677
12678#define _mm256_fixupimm_ps(X, Y, Z, C) \
12679 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
12680 (__v8sf)(__m256)(Y), \
12681 (__v8si)(__m256i)(Z), (int)(C), \
12682 (__mmask8)(-1)))
12683
12684
12685#define _mm256_mask_fixupimm_ps(X, U, Y, Z, C) \
12686 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
12687 (__v8sf)(__m256)(Y), \
12688 (__v8si)(__m256i)(Z), (int)(C), \
12689 (__mmask8)(U)))
12690
12691#define _mm256_maskz_fixupimm_ps(U, X, Y, Z, C) \
12692 ((__m256)__builtin_ia32_fixupimmps256_maskz ((__v8sf)(__m256)(X), \
12693 (__v8sf)(__m256)(Y), \
12694 (__v8si)(__m256i)(Z), (int)(C),\
12695 (__mmask8)(U)))
12696
12697#define _mm_fixupimm_pd(X, Y, Z, C) \
12698 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
12699 (__v2df)(__m128d)(Y), \
12700 (__v2di)(__m128i)(Z), (int)(C), \
12701 (__mmask8)(-1)))
12702
12703
12704#define _mm_mask_fixupimm_pd(X, U, Y, Z, C) \
12705 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
12706 (__v2df)(__m128d)(Y), \
12707 (__v2di)(__m128i)(Z), (int)(C), \
12708 (__mmask8)(U)))
12709
12710#define _mm_maskz_fixupimm_pd(U, X, Y, Z, C) \
12711 ((__m128d)__builtin_ia32_fixupimmpd128_maskz ((__v2df)(__m128d)(X), \
12712 (__v2df)(__m128d)(Y), \
12713 (__v2di)(__m128i)(Z), (int)(C),\
12714 (__mmask8)(U)))
12715
12716#define _mm_fixupimm_ps(X, Y, Z, C) \
12717 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
12718 (__v4sf)(__m128)(Y), \
12719 (__v4si)(__m128i)(Z), (int)(C), \
12720 (__mmask8)(-1)))
12721
12722#define _mm_mask_fixupimm_ps(X, U, Y, Z, C) \
12723 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
12724 (__v4sf)(__m128)(Y), \
12725 (__v4si)(__m128i)(Z), (int)(C),\
12726 (__mmask8)(U)))
12727
12728#define _mm_maskz_fixupimm_ps(U, X, Y, Z, C) \
12729 ((__m128)__builtin_ia32_fixupimmps128_maskz ((__v4sf)(__m128)(X), \
12730 (__v4sf)(__m128)(Y), \
12731 (__v4si)(__m128i)(Z), (int)(C),\
12732 (__mmask8)(U)))
12733
12734#define _mm256_mask_srli_epi32(W, U, A, B) \
12735 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
12736 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
12737
12738#define _mm256_maskz_srli_epi32(U, A, B) \
12739 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
a25a7887 12740 (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
936c0fe4
AI
12741
12742#define _mm_mask_srli_epi32(W, U, A, B) \
12743 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
12744 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
12745
12746#define _mm_maskz_srli_epi32(U, A, B) \
12747 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
a25a7887 12748 (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
936c0fe4
AI
12749
12750#define _mm256_mask_srli_epi64(W, U, A, B) \
12751 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
12752 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
12753
12754#define _mm256_maskz_srli_epi64(U, A, B) \
12755 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
12756 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
12757
12758#define _mm_mask_srli_epi64(W, U, A, B) \
12759 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
12760 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
12761
12762#define _mm_maskz_srli_epi64(U, A, B) \
12763 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
a25a7887 12764 (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
936c0fe4
AI
12765
12766#define _mm256_mask_slli_epi32(W, U, X, C) \
12767 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
a25a7887 12768 (__v8si)(__m256i)(W), \
936c0fe4
AI
12769 (__mmask8)(U)))
12770
12771#define _mm256_maskz_slli_epi32(U, X, C) \
12772 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
a25a7887 12773 (__v8si)(__m256i)_mm256_setzero_si256 (), \
936c0fe4
AI
12774 (__mmask8)(U)))
12775
12776#define _mm256_mask_slli_epi64(W, U, X, C) \
12777 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
a25a7887 12778 (__v4di)(__m256i)(W), \
936c0fe4
AI
12779 (__mmask8)(U)))
12780
12781#define _mm256_maskz_slli_epi64(U, X, C) \
12782 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
a25a7887 12783 (__v4di)(__m256i)_mm256_setzero_si256 (), \
936c0fe4
AI
12784 (__mmask8)(U)))
12785
12786#define _mm_mask_slli_epi32(W, U, X, C) \
12787 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12788 (__v4si)(__m128i)(W),\
12789 (__mmask8)(U)))
12790
12791#define _mm_maskz_slli_epi32(U, X, C) \
12792 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12793 (__v4si)(__m128i)_mm_setzero_si128 (),\
12794 (__mmask8)(U)))
12795
12796#define _mm_mask_slli_epi64(W, U, X, C) \
12797 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12798 (__v2di)(__m128i)(W),\
12799 (__mmask8)(U)))
12800
12801#define _mm_maskz_slli_epi64(U, X, C) \
12802 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
a25a7887 12803 (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
12804 (__mmask8)(U)))
12805
12806#define _mm256_ternarylogic_epi64(A, B, C, I) \
12807 ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
12808 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)-1))
12809
12810#define _mm256_mask_ternarylogic_epi64(A, U, B, C, I) \
12811 ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
12812 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12813
12814#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I) \
12815 ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di)(__m256i)(A), \
12816 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12817
12818#define _mm256_ternarylogic_epi32(A, B, C, I) \
12819 ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
12820 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)-1))
12821
12822#define _mm256_mask_ternarylogic_epi32(A, U, B, C, I) \
12823 ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
12824 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12825
12826#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I) \
12827 ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si)(__m256i)(A), \
12828 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12829
12830#define _mm_ternarylogic_epi64(A, B, C, I) \
12831 ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
12832 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)-1))
12833
12834#define _mm_mask_ternarylogic_epi64(A, U, B, C, I) \
12835 ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
12836 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12837
12838#define _mm_maskz_ternarylogic_epi64(U, A, B, C, I) \
12839 ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di)(__m128i)(A), \
12840 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12841
12842#define _mm_ternarylogic_epi32(A, B, C, I) \
12843 ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
12844 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)-1))
12845
12846#define _mm_mask_ternarylogic_epi32(A, U, B, C, I) \
12847 ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
12848 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12849
12850#define _mm_maskz_ternarylogic_epi32(U, A, B, C, I) \
12851 ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si)(__m128i)(A), \
12852 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12853
12854#define _mm256_roundscale_ps(A, B) \
12855 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
a25a7887 12856 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)-1))
936c0fe4
AI
12857
12858#define _mm256_mask_roundscale_ps(W, U, A, B) \
12859 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
12860 (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
12861
12862#define _mm256_maskz_roundscale_ps(U, A, B) \
12863 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
a25a7887 12864 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)(U)))
936c0fe4
AI
12865
12866#define _mm256_roundscale_pd(A, B) \
12867 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
a25a7887 12868 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)-1))
936c0fe4
AI
12869
12870#define _mm256_mask_roundscale_pd(W, U, A, B) \
12871 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
12872 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
12873
12874#define _mm256_maskz_roundscale_pd(U, A, B) \
12875 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
a25a7887 12876 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
936c0fe4
AI
12877
12878#define _mm_roundscale_ps(A, B) \
12879 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
a25a7887 12880 (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)-1))
936c0fe4
AI
12881
12882#define _mm_mask_roundscale_ps(W, U, A, B) \
12883 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
12884 (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
12885
12886#define _mm_maskz_roundscale_ps(U, A, B) \
12887 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
a25a7887 12888 (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)(U)))
936c0fe4
AI
12889
12890#define _mm_roundscale_pd(A, B) \
12891 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
a25a7887 12892 (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)-1))
936c0fe4
AI
12893
12894#define _mm_mask_roundscale_pd(W, U, A, B) \
12895 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
12896 (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
12897
12898#define _mm_maskz_roundscale_pd(U, A, B) \
12899 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
a25a7887 12900 (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)(U)))
936c0fe4
AI
12901
12902#define _mm256_getmant_ps(X, B, C) \
12903 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12904 (int)(((C)<<2) | (B)), \
a25a7887 12905 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12906 (__mmask8)-1))
12907
12908#define _mm256_mask_getmant_ps(W, U, X, B, C) \
12909 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12910 (int)(((C)<<2) | (B)), \
12911 (__v8sf)(__m256)(W), \
12912 (__mmask8)(U)))
12913
12914#define _mm256_maskz_getmant_ps(U, X, B, C) \
12915 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12916 (int)(((C)<<2) | (B)), \
a25a7887 12917 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12918 (__mmask8)(U)))
12919
12920#define _mm_getmant_ps(X, B, C) \
12921 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12922 (int)(((C)<<2) | (B)), \
a25a7887 12923 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
12924 (__mmask8)-1))
12925
12926#define _mm_mask_getmant_ps(W, U, X, B, C) \
12927 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12928 (int)(((C)<<2) | (B)), \
12929 (__v4sf)(__m128)(W), \
12930 (__mmask8)(U)))
12931
12932#define _mm_maskz_getmant_ps(U, X, B, C) \
12933 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12934 (int)(((C)<<2) | (B)), \
a25a7887 12935 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
12936 (__mmask8)(U)))
12937
12938#define _mm256_getmant_pd(X, B, C) \
12939 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12940 (int)(((C)<<2) | (B)), \
a25a7887 12941 (__v4df)(__m256d)_mm256_setzero_pd (),\
936c0fe4
AI
12942 (__mmask8)-1))
12943
12944#define _mm256_mask_getmant_pd(W, U, X, B, C) \
12945 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12946 (int)(((C)<<2) | (B)), \
12947 (__v4df)(__m256d)(W), \
12948 (__mmask8)(U)))
12949
12950#define _mm256_maskz_getmant_pd(U, X, B, C) \
12951 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12952 (int)(((C)<<2) | (B)), \
a25a7887 12953 (__v4df)(__m256d)_mm256_setzero_pd (),\
936c0fe4
AI
12954 (__mmask8)(U)))
12955
12956#define _mm_getmant_pd(X, B, C) \
12957 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12958 (int)(((C)<<2) | (B)), \
a25a7887 12959 (__v2df)(__m128d)_mm_setzero_pd (), \
936c0fe4
AI
12960 (__mmask8)-1))
12961
12962#define _mm_mask_getmant_pd(W, U, X, B, C) \
12963 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12964 (int)(((C)<<2) | (B)), \
12965 (__v2df)(__m128d)(W), \
12966 (__mmask8)(U)))
12967
12968#define _mm_maskz_getmant_pd(U, X, B, C) \
12969 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12970 (int)(((C)<<2) | (B)), \
a25a7887 12971 (__v2df)(__m128d)_mm_setzero_pd (), \
936c0fe4
AI
12972 (__mmask8)(U)))
12973
12974#define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12975 (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256)V1OLD, \
0e171d54 12976 (void const *)ADDR, \
936c0fe4
AI
12977 (__v8si)(__m256i)INDEX, \
12978 (__mmask8)MASK, (int)SCALE)
12979
12980#define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12981 (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128)V1OLD, \
0e171d54 12982 (void const *)ADDR, \
936c0fe4
AI
12983 (__v4si)(__m128i)INDEX, \
12984 (__mmask8)MASK, (int)SCALE)
12985
12986#define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12987 (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d)V1OLD, \
0e171d54 12988 (void const *)ADDR, \
936c0fe4
AI
12989 (__v4si)(__m128i)INDEX, \
12990 (__mmask8)MASK, (int)SCALE)
12991
12992#define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12993 (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d)V1OLD, \
0e171d54 12994 (void const *)ADDR, \
936c0fe4
AI
12995 (__v4si)(__m128i)INDEX, \
12996 (__mmask8)MASK, (int)SCALE)
12997
12998#define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12999 (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128)V1OLD, \
0e171d54 13000 (void const *)ADDR, \
936c0fe4
AI
13001 (__v4di)(__m256i)INDEX, \
13002 (__mmask8)MASK, (int)SCALE)
13003
13004#define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
13005 (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128)V1OLD, \
0e171d54 13006 (void const *)ADDR, \
936c0fe4
AI
13007 (__v2di)(__m128i)INDEX, \
13008 (__mmask8)MASK, (int)SCALE)
13009
13010#define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
13011 (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d)V1OLD, \
0e171d54 13012 (void const *)ADDR, \
936c0fe4
AI
13013 (__v4di)(__m256i)INDEX, \
13014 (__mmask8)MASK, (int)SCALE)
13015
13016#define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
13017 (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d)V1OLD, \
0e171d54 13018 (void const *)ADDR, \
936c0fe4
AI
13019 (__v2di)(__m128i)INDEX, \
13020 (__mmask8)MASK, (int)SCALE)
13021
13022#define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
13023 (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i)V1OLD, \
0e171d54 13024 (void const *)ADDR, \
936c0fe4
AI
13025 (__v8si)(__m256i)INDEX, \
13026 (__mmask8)MASK, (int)SCALE)
13027
13028#define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
13029 (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i)V1OLD, \
0e171d54 13030 (void const *)ADDR, \
936c0fe4
AI
13031 (__v4si)(__m128i)INDEX, \
13032 (__mmask8)MASK, (int)SCALE)
13033
13034#define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
13035 (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i)V1OLD, \
0e171d54 13036 (void const *)ADDR, \
936c0fe4
AI
13037 (__v4si)(__m128i)INDEX, \
13038 (__mmask8)MASK, (int)SCALE)
13039
13040#define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
13041 (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i)V1OLD, \
0e171d54 13042 (void const *)ADDR, \
936c0fe4
AI
13043 (__v4si)(__m128i)INDEX, \
13044 (__mmask8)MASK, (int)SCALE)
13045
13046#define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
13047 (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i)V1OLD, \
0e171d54 13048 (void const *)ADDR, \
936c0fe4
AI
13049 (__v4di)(__m256i)INDEX, \
13050 (__mmask8)MASK, (int)SCALE)
13051
13052#define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
13053 (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i)V1OLD, \
0e171d54 13054 (void const *)ADDR, \
936c0fe4
AI
13055 (__v2di)(__m128i)INDEX, \
13056 (__mmask8)MASK, (int)SCALE)
13057
13058#define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
13059 (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i)V1OLD, \
0e171d54 13060 (void const *)ADDR, \
936c0fe4
AI
13061 (__v4di)(__m256i)INDEX, \
13062 (__mmask8)MASK, (int)SCALE)
13063
13064#define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
13065 (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i)V1OLD, \
0e171d54 13066 (void const *)ADDR, \
936c0fe4
AI
13067 (__v2di)(__m128i)INDEX, \
13068 (__mmask8)MASK, (int)SCALE)
13069
13070#define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
0e171d54 13071 __builtin_ia32_scattersiv8sf ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13072 (__v8si)(__m256i)INDEX, \
13073 (__v8sf)(__m256)V1, (int)SCALE)
13074
13075#define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13076 __builtin_ia32_scattersiv8sf ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13077 (__v8si)(__m256i)INDEX, \
13078 (__v8sf)(__m256)V1, (int)SCALE)
13079
13080#define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
0e171d54 13081 __builtin_ia32_scattersiv4sf ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13082 (__v4si)(__m128i)INDEX, \
13083 (__v4sf)(__m128)V1, (int)SCALE)
13084
13085#define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13086 __builtin_ia32_scattersiv4sf ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13087 (__v4si)(__m128i)INDEX, \
13088 (__v4sf)(__m128)V1, (int)SCALE)
13089
13090#define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
0e171d54 13091 __builtin_ia32_scattersiv4df ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13092 (__v4si)(__m128i)INDEX, \
13093 (__v4df)(__m256d)V1, (int)SCALE)
13094
13095#define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13096 __builtin_ia32_scattersiv4df ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13097 (__v4si)(__m128i)INDEX, \
13098 (__v4df)(__m256d)V1, (int)SCALE)
13099
13100#define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
0e171d54 13101 __builtin_ia32_scattersiv2df ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13102 (__v4si)(__m128i)INDEX, \
13103 (__v2df)(__m128d)V1, (int)SCALE)
13104
13105#define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13106 __builtin_ia32_scattersiv2df ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13107 (__v4si)(__m128i)INDEX, \
13108 (__v2df)(__m128d)V1, (int)SCALE)
13109
13110#define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
0e171d54 13111 __builtin_ia32_scatterdiv8sf ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13112 (__v4di)(__m256i)INDEX, \
13113 (__v4sf)(__m128)V1, (int)SCALE)
13114
13115#define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13116 __builtin_ia32_scatterdiv8sf ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13117 (__v4di)(__m256i)INDEX, \
13118 (__v4sf)(__m128)V1, (int)SCALE)
13119
13120#define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
0e171d54 13121 __builtin_ia32_scatterdiv4sf ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13122 (__v2di)(__m128i)INDEX, \
13123 (__v4sf)(__m128)V1, (int)SCALE)
13124
13125#define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13126 __builtin_ia32_scatterdiv4sf ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13127 (__v2di)(__m128i)INDEX, \
13128 (__v4sf)(__m128)V1, (int)SCALE)
13129
13130#define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
0e171d54 13131 __builtin_ia32_scatterdiv4df ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13132 (__v4di)(__m256i)INDEX, \
13133 (__v4df)(__m256d)V1, (int)SCALE)
13134
13135#define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13136 __builtin_ia32_scatterdiv4df ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13137 (__v4di)(__m256i)INDEX, \
13138 (__v4df)(__m256d)V1, (int)SCALE)
13139
13140#define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
0e171d54 13141 __builtin_ia32_scatterdiv2df ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13142 (__v2di)(__m128i)INDEX, \
13143 (__v2df)(__m128d)V1, (int)SCALE)
13144
13145#define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13146 __builtin_ia32_scatterdiv2df ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13147 (__v2di)(__m128i)INDEX, \
13148 (__v2df)(__m128d)V1, (int)SCALE)
13149
13150#define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
0e171d54 13151 __builtin_ia32_scattersiv8si ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13152 (__v8si)(__m256i)INDEX, \
13153 (__v8si)(__m256i)V1, (int)SCALE)
13154
13155#define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13156 __builtin_ia32_scattersiv8si ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13157 (__v8si)(__m256i)INDEX, \
13158 (__v8si)(__m256i)V1, (int)SCALE)
13159
13160#define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
0e171d54 13161 __builtin_ia32_scattersiv4si ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13162 (__v4si)(__m128i)INDEX, \
13163 (__v4si)(__m128i)V1, (int)SCALE)
13164
13165#define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13166 __builtin_ia32_scattersiv4si ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13167 (__v4si)(__m128i)INDEX, \
13168 (__v4si)(__m128i)V1, (int)SCALE)
13169
13170#define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
0e171d54 13171 __builtin_ia32_scattersiv4di ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13172 (__v4si)(__m128i)INDEX, \
13173 (__v4di)(__m256i)V1, (int)SCALE)
13174
13175#define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13176 __builtin_ia32_scattersiv4di ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13177 (__v4si)(__m128i)INDEX, \
13178 (__v4di)(__m256i)V1, (int)SCALE)
13179
13180#define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
0e171d54 13181 __builtin_ia32_scattersiv2di ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13182 (__v4si)(__m128i)INDEX, \
13183 (__v2di)(__m128i)V1, (int)SCALE)
13184
13185#define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13186 __builtin_ia32_scattersiv2di ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13187 (__v4si)(__m128i)INDEX, \
13188 (__v2di)(__m128i)V1, (int)SCALE)
13189
13190#define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
0e171d54 13191 __builtin_ia32_scatterdiv8si ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13192 (__v4di)(__m256i)INDEX, \
13193 (__v4si)(__m128i)V1, (int)SCALE)
13194
13195#define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13196 __builtin_ia32_scatterdiv8si ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13197 (__v4di)(__m256i)INDEX, \
13198 (__v4si)(__m128i)V1, (int)SCALE)
13199
13200#define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
0e171d54 13201 __builtin_ia32_scatterdiv4si ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13202 (__v2di)(__m128i)INDEX, \
13203 (__v4si)(__m128i)V1, (int)SCALE)
13204
13205#define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13206 __builtin_ia32_scatterdiv4si ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13207 (__v2di)(__m128i)INDEX, \
13208 (__v4si)(__m128i)V1, (int)SCALE)
13209
13210#define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
0e171d54 13211 __builtin_ia32_scatterdiv4di ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13212 (__v4di)(__m256i)INDEX, \
13213 (__v4di)(__m256i)V1, (int)SCALE)
13214
13215#define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13216 __builtin_ia32_scatterdiv4di ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13217 (__v4di)(__m256i)INDEX, \
13218 (__v4di)(__m256i)V1, (int)SCALE)
13219
13220#define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
0e171d54 13221 __builtin_ia32_scatterdiv2di ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13222 (__v2di)(__m128i)INDEX, \
13223 (__v2di)(__m128i)V1, (int)SCALE)
13224
13225#define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13226 __builtin_ia32_scatterdiv2di ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13227 (__v2di)(__m128i)INDEX, \
13228 (__v2di)(__m128i)V1, (int)SCALE)
13229
13230#define _mm256_mask_shuffle_epi32(W, U, X, C) \
13231 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
13232 (__v8si)(__m256i)(W), \
13233 (__mmask8)(U)))
13234
13235#define _mm256_maskz_shuffle_epi32(U, X, C) \
13236 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
a25a7887
JJ
13237 (__v8si)(__m256i) \
13238 _mm256_setzero_si256 (), \
936c0fe4
AI
13239 (__mmask8)(U)))
13240
13241#define _mm_mask_shuffle_epi32(W, U, X, C) \
13242 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
13243 (__v4si)(__m128i)(W), \
13244 (__mmask8)(U)))
13245
13246#define _mm_maskz_shuffle_epi32(U, X, C) \
13247 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
a25a7887 13248 (__v4si)(__m128i)_mm_setzero_si128 (), \
936c0fe4
AI
13249 (__mmask8)(U)))
13250
13251#define _mm256_rol_epi64(A, B) \
13252 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13253 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13254 (__mmask8)-1))
13255
13256#define _mm256_mask_rol_epi64(W, U, A, B) \
13257 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13258 (__v4di)(__m256i)(W), \
13259 (__mmask8)(U)))
13260
13261#define _mm256_maskz_rol_epi64(U, A, B) \
13262 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13263 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13264 (__mmask8)(U)))
13265
13266#define _mm_rol_epi64(A, B) \
13267 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
a25a7887 13268 (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13269 (__mmask8)-1))
13270
13271#define _mm_mask_rol_epi64(W, U, A, B) \
13272 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13273 (__v2di)(__m128i)(W), \
13274 (__mmask8)(U)))
13275
13276#define _mm_maskz_rol_epi64(U, A, B) \
13277 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
a25a7887 13278 (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13279 (__mmask8)(U)))
13280
13281#define _mm256_ror_epi64(A, B) \
13282 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13283 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13284 (__mmask8)-1))
13285
13286#define _mm256_mask_ror_epi64(W, U, A, B) \
13287 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13288 (__v4di)(__m256i)(W), \
13289 (__mmask8)(U)))
13290
13291#define _mm256_maskz_ror_epi64(U, A, B) \
13292 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13293 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13294 (__mmask8)(U)))
13295
13296#define _mm_ror_epi64(A, B) \
13297 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
a25a7887 13298 (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13299 (__mmask8)-1))
13300
13301#define _mm_mask_ror_epi64(W, U, A, B) \
13302 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13303 (__v2di)(__m128i)(W), \
13304 (__mmask8)(U)))
13305
13306#define _mm_maskz_ror_epi64(U, A, B) \
13307 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
a25a7887 13308 (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13309 (__mmask8)(U)))
13310
13311#define _mm256_rol_epi32(A, B) \
13312 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
a25a7887 13313 (__v8si)(__m256i)_mm256_setzero_si256 (),\
936c0fe4
AI
13314 (__mmask8)-1))
13315
13316#define _mm256_mask_rol_epi32(W, U, A, B) \
13317 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
13318 (__v8si)(__m256i)(W), \
13319 (__mmask8)(U)))
13320
13321#define _mm256_maskz_rol_epi32(U, A, B) \
13322 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
a25a7887 13323 (__v8si)(__m256i)_mm256_setzero_si256 (),\
936c0fe4
AI
13324 (__mmask8)(U)))
13325
13326#define _mm_rol_epi32(A, B) \
13327 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
a25a7887 13328 (__v4si)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13329 (__mmask8)-1))
13330
13331#define _mm_mask_rol_epi32(W, U, A, B) \
13332 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
13333 (__v4si)(__m128i)(W), \
13334 (__mmask8)(U)))
13335
13336#define _mm_maskz_rol_epi32(U, A, B) \
13337 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
a25a7887 13338 (__v4si)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13339 (__mmask8)(U)))
13340
13341#define _mm256_ror_epi32(A, B) \
13342 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
a25a7887 13343 (__v8si)(__m256i)_mm256_setzero_si256 (),\
936c0fe4
AI
13344 (__mmask8)-1))
13345
13346#define _mm256_mask_ror_epi32(W, U, A, B) \
13347 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
13348 (__v8si)(__m256i)(W), \
13349 (__mmask8)(U)))
13350
13351#define _mm256_maskz_ror_epi32(U, A, B) \
13352 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
a25a7887
JJ
13353 (__v8si)(__m256i) \
13354 _mm256_setzero_si256 (), \
936c0fe4
AI
13355 (__mmask8)(U)))
13356
13357#define _mm_ror_epi32(A, B) \
13358 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
a25a7887 13359 (__v4si)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13360 (__mmask8)-1))
13361
13362#define _mm_mask_ror_epi32(W, U, A, B) \
13363 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
13364 (__v4si)(__m128i)(W), \
13365 (__mmask8)(U)))
13366
13367#define _mm_maskz_ror_epi32(U, A, B) \
13368 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
a25a7887 13369 (__v4si)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13370 (__mmask8)(U)))
13371
13372#define _mm256_alignr_epi32(X, Y, C) \
13373 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13374 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(X), (__mmask8)-1))
13375
13376#define _mm256_mask_alignr_epi32(W, U, X, Y, C) \
13377 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13378 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(W), (__mmask8)(U)))
13379
13380#define _mm256_maskz_alignr_epi32(U, X, Y, C) \
13381 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13382 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\
13383 (__mmask8)(U)))
13384
13385#define _mm256_alignr_epi64(X, Y, C) \
13386 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13387 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(X), (__mmask8)-1))
13388
13389#define _mm256_mask_alignr_epi64(W, U, X, Y, C) \
13390 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13391 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(W), (__mmask8)(U)))
13392
13393#define _mm256_maskz_alignr_epi64(U, X, Y, C) \
13394 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13395 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\
13396 (__mmask8)(U)))
13397
13398#define _mm_alignr_epi32(X, Y, C) \
13399 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13400 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(X), (__mmask8)-1))
13401
13402#define _mm_mask_alignr_epi32(W, U, X, Y, C) \
13403 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13404 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(W), (__mmask8)(U)))
13405
13406#define _mm_maskz_alignr_epi32(U, X, Y, C) \
13407 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
a25a7887 13408 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13409 (__mmask8)(U)))
13410
13411#define _mm_alignr_epi64(X, Y, C) \
13412 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13413 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13414
13415#define _mm_mask_alignr_epi64(W, U, X, Y, C) \
13416 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13417 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13418
13419#define _mm_maskz_alignr_epi64(U, X, Y, C) \
13420 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
a25a7887 13421 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13422 (__mmask8)(U)))
13423
13424#define _mm_mask_cvtps_ph(W, U, A, I) \
13425 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
13426 (__v8hi)(__m128i) (W), (__mmask8) (U)))
13427
13428#define _mm_maskz_cvtps_ph(U, A, I) \
13429 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
a25a7887 13430 (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
936c0fe4
AI
13431
13432#define _mm256_mask_cvtps_ph(W, U, A, I) \
13433 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
13434 (__v8hi)(__m128i) (W), (__mmask8) (U)))
13435
13436#define _mm256_maskz_cvtps_ph(U, A, I) \
13437 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
a25a7887 13438 (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
936c0fe4
AI
13439
13440#define _mm256_mask_srai_epi32(W, U, A, B) \
13441 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
13442 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
13443
13444#define _mm256_maskz_srai_epi32(U, A, B) \
13445 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
a25a7887 13446 (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
936c0fe4
AI
13447
13448#define _mm_mask_srai_epi32(W, U, A, B) \
13449 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
13450 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
13451
13452#define _mm_maskz_srai_epi32(U, A, B) \
13453 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
a25a7887 13454 (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
936c0fe4
AI
13455
13456#define _mm256_srai_epi64(A, B) \
13457 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13458 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
13459
13460#define _mm256_mask_srai_epi64(W, U, A, B) \
13461 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13462 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
13463
13464#define _mm256_maskz_srai_epi64(U, A, B) \
13465 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13466 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
13467
13468#define _mm_srai_epi64(A, B) \
13469 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
a25a7887 13470 (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)-1))
936c0fe4
AI
13471
13472#define _mm_mask_srai_epi64(W, U, A, B) \
13473 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
13474 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
13475
13476#define _mm_maskz_srai_epi64(U, A, B) \
13477 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
a25a7887 13478 (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
936c0fe4
AI
13479
13480#define _mm256_mask_permutex_pd(W, U, A, B) \
13481 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
13482 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
13483
13484#define _mm256_maskz_permutex_pd(U, A, B) \
13485 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
a25a7887 13486 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
936c0fe4
AI
13487
13488#define _mm256_mask_permute_pd(W, U, X, C) \
13489 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
13490 (__v4df)(__m256d)(W), \
13491 (__mmask8)(U)))
13492
13493#define _mm256_maskz_permute_pd(U, X, C) \
13494 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
a25a7887 13495 (__v4df)(__m256d)_mm256_setzero_pd (),\
936c0fe4
AI
13496 (__mmask8)(U)))
13497
13498#define _mm256_mask_permute_ps(W, U, X, C) \
13499 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
13500 (__v8sf)(__m256)(W), (__mmask8)(U)))
13501
13502#define _mm256_maskz_permute_ps(U, X, C) \
13503 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
a25a7887 13504 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
13505 (__mmask8)(U)))
13506
13507#define _mm_mask_permute_pd(W, U, X, C) \
13508 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
13509 (__v2df)(__m128d)(W), (__mmask8)(U)))
13510
13511#define _mm_maskz_permute_pd(U, X, C) \
13512 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
a25a7887 13513 (__v2df)(__m128d)_mm_setzero_pd (), \
936c0fe4
AI
13514 (__mmask8)(U)))
13515
13516#define _mm_mask_permute_ps(W, U, X, C) \
13517 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
13518 (__v4sf)(__m128)(W), (__mmask8)(U)))
13519
13520#define _mm_maskz_permute_ps(U, X, C) \
13521 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
a25a7887 13522 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
13523 (__mmask8)(U)))
13524
13525#define _mm256_mask_blend_pd(__U, __A, __W) \
13526 ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A), \
13527 (__v4df) (__W), \
13528 (__mmask8) (__U)))
13529
13530#define _mm256_mask_blend_ps(__U, __A, __W) \
13531 ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A), \
13532 (__v8sf) (__W), \
13533 (__mmask8) (__U)))
13534
13535#define _mm256_mask_blend_epi64(__U, __A, __W) \
13536 ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A), \
13537 (__v4di) (__W), \
13538 (__mmask8) (__U)))
13539
13540#define _mm256_mask_blend_epi32(__U, __A, __W) \
13541 ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A), \
13542 (__v8si) (__W), \
13543 (__mmask8) (__U)))
13544
13545#define _mm_mask_blend_pd(__U, __A, __W) \
13546 ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A), \
13547 (__v2df) (__W), \
13548 (__mmask8) (__U)))
13549
13550#define _mm_mask_blend_ps(__U, __A, __W) \
13551 ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A), \
13552 (__v4sf) (__W), \
13553 (__mmask8) (__U)))
13554
13555#define _mm_mask_blend_epi64(__U, __A, __W) \
13556 ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A), \
13557 (__v2di) (__W), \
13558 (__mmask8) (__U)))
13559
13560#define _mm_mask_blend_epi32(__U, __A, __W) \
13561 ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A), \
13562 (__v4si) (__W), \
13563 (__mmask8) (__U)))
13564
13565#define _mm256_cmp_epu32_mask(X, Y, P) \
13566 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
13567 (__v8si)(__m256i)(Y), (int)(P),\
13568 (__mmask8)-1))
13569
13570#define _mm256_cmp_epi64_mask(X, Y, P) \
13571 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
13572 (__v4di)(__m256i)(Y), (int)(P),\
13573 (__mmask8)-1))
13574
13575#define _mm256_cmp_epi32_mask(X, Y, P) \
13576 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
13577 (__v8si)(__m256i)(Y), (int)(P),\
13578 (__mmask8)-1))
13579
13580#define _mm256_cmp_epu64_mask(X, Y, P) \
13581 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
13582 (__v4di)(__m256i)(Y), (int)(P),\
13583 (__mmask8)-1))
13584
13585#define _mm256_cmp_pd_mask(X, Y, P) \
13586 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
13587 (__v4df)(__m256d)(Y), (int)(P),\
13588 (__mmask8)-1))
13589
13590#define _mm256_cmp_ps_mask(X, Y, P) \
13591 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
13592 (__v8sf)(__m256)(Y), (int)(P),\
13593 (__mmask8)-1))
13594
13595#define _mm256_mask_cmp_epi64_mask(M, X, Y, P) \
13596 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
13597 (__v4di)(__m256i)(Y), (int)(P),\
13598 (__mmask8)(M)))
13599
13600#define _mm256_mask_cmp_epi32_mask(M, X, Y, P) \
13601 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
13602 (__v8si)(__m256i)(Y), (int)(P),\
13603 (__mmask8)(M)))
13604
13605#define _mm256_mask_cmp_epu64_mask(M, X, Y, P) \
13606 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
13607 (__v4di)(__m256i)(Y), (int)(P),\
13608 (__mmask8)(M)))
13609
13610#define _mm256_mask_cmp_epu32_mask(M, X, Y, P) \
13611 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
13612 (__v8si)(__m256i)(Y), (int)(P),\
13613 (__mmask8)(M)))
13614
13615#define _mm256_mask_cmp_pd_mask(M, X, Y, P) \
13616 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
13617 (__v4df)(__m256d)(Y), (int)(P),\
13618 (__mmask8)(M)))
13619
13620#define _mm256_mask_cmp_ps_mask(M, X, Y, P) \
13621 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
13622 (__v8sf)(__m256)(Y), (int)(P),\
13623 (__mmask8)(M)))
13624
13625#define _mm_cmp_epi64_mask(X, Y, P) \
13626 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
13627 (__v2di)(__m128i)(Y), (int)(P),\
13628 (__mmask8)-1))
13629
13630#define _mm_cmp_epi32_mask(X, Y, P) \
13631 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
13632 (__v4si)(__m128i)(Y), (int)(P),\
13633 (__mmask8)-1))
13634
13635#define _mm_cmp_epu64_mask(X, Y, P) \
13636 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
13637 (__v2di)(__m128i)(Y), (int)(P),\
13638 (__mmask8)-1))
13639
13640#define _mm_cmp_epu32_mask(X, Y, P) \
13641 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
13642 (__v4si)(__m128i)(Y), (int)(P),\
13643 (__mmask8)-1))
13644
13645#define _mm_cmp_pd_mask(X, Y, P) \
13646 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
13647 (__v2df)(__m128d)(Y), (int)(P),\
13648 (__mmask8)-1))
13649
13650#define _mm_cmp_ps_mask(X, Y, P) \
13651 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
13652 (__v4sf)(__m128)(Y), (int)(P),\
13653 (__mmask8)-1))
13654
13655#define _mm_mask_cmp_epi64_mask(M, X, Y, P) \
13656 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
13657 (__v2di)(__m128i)(Y), (int)(P),\
13658 (__mmask8)(M)))
13659
13660#define _mm_mask_cmp_epi32_mask(M, X, Y, P) \
13661 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
13662 (__v4si)(__m128i)(Y), (int)(P),\
13663 (__mmask8)(M)))
13664
13665#define _mm_mask_cmp_epu64_mask(M, X, Y, P) \
13666 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
13667 (__v2di)(__m128i)(Y), (int)(P),\
13668 (__mmask8)(M)))
13669
13670#define _mm_mask_cmp_epu32_mask(M, X, Y, P) \
13671 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
13672 (__v4si)(__m128i)(Y), (int)(P),\
13673 (__mmask8)(M)))
13674
13675#define _mm_mask_cmp_pd_mask(M, X, Y, P) \
13676 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
13677 (__v2df)(__m128d)(Y), (int)(P),\
13678 (__mmask8)(M)))
13679
13680#define _mm_mask_cmp_ps_mask(M, X, Y, P) \
13681 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
13682 (__v4sf)(__m128)(Y), (int)(P),\
13683 (__mmask8)(M)))
13684
13685#endif
13686
a25a7887 13687#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps ((B), (A))
936c0fe4
AI
13688
13689#ifdef __DISABLE_AVX512VL__
13690#undef __DISABLE_AVX512VL__
13691#pragma GCC pop_options
13692#endif /* __DISABLE_AVX512VL__ */
13693
13694#endif /* _AVX512VLINTRIN_H_INCLUDED */