]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/avx512vlintrin.h
Update copyright years.
[thirdparty/gcc.git] / gcc / config / i386 / avx512vlintrin.h
CommitLineData
8d9254fc 1/* Copyright (C) 2014-2020 Free Software Foundation, Inc.
936c0fe4
AI
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512VLINTRIN_H_INCLUDED
29#define _AVX512VLINTRIN_H_INCLUDED
30
936c0fe4
AI
31#ifndef __AVX512VL__
32#pragma GCC push_options
33#pragma GCC target("avx512vl")
34#define __DISABLE_AVX512VL__
35#endif /* __AVX512VL__ */
36
37/* Internal data types for implementing the intrinsics. */
38typedef unsigned int __mmask32;
39
40extern __inline __m256d
41__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
42_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
43{
44 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
45 (__v4df) __W,
46 (__mmask8) __U);
47}
48
49extern __inline __m256d
50__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
51_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
52{
53 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
54 (__v4df)
55 _mm256_setzero_pd (),
56 (__mmask8) __U);
57}
58
59extern __inline __m128d
60__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
61_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
62{
63 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
64 (__v2df) __W,
65 (__mmask8) __U);
66}
67
68extern __inline __m128d
69__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
70_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
71{
72 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
73 (__v2df)
74 _mm_setzero_pd (),
75 (__mmask8) __U);
76}
77
78extern __inline __m256d
79__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
80_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
81{
82 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
83 (__v4df) __W,
84 (__mmask8) __U);
85}
86
87extern __inline __m256d
88__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
89_mm256_maskz_load_pd (__mmask8 __U, void const *__P)
90{
91 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
92 (__v4df)
93 _mm256_setzero_pd (),
94 (__mmask8) __U);
95}
96
97extern __inline __m128d
98__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
99_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
100{
101 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
102 (__v2df) __W,
103 (__mmask8) __U);
104}
105
106extern __inline __m128d
107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
108_mm_maskz_load_pd (__mmask8 __U, void const *__P)
109{
110 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
111 (__v2df)
112 _mm_setzero_pd (),
113 (__mmask8) __U);
114}
115
116extern __inline void
117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
118_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
119{
120 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
121 (__v4df) __A,
122 (__mmask8) __U);
123}
124
125extern __inline void
126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
127_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
128{
129 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
130 (__v2df) __A,
131 (__mmask8) __U);
132}
133
134extern __inline __m256
135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
137{
138 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
139 (__v8sf) __W,
140 (__mmask8) __U);
141}
142
143extern __inline __m256
144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
145_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
146{
147 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
148 (__v8sf)
149 _mm256_setzero_ps (),
150 (__mmask8) __U);
151}
152
153extern __inline __m128
154__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
155_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
156{
157 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
158 (__v4sf) __W,
159 (__mmask8) __U);
160}
161
162extern __inline __m128
163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164_mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
165{
166 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
167 (__v4sf)
168 _mm_setzero_ps (),
169 (__mmask8) __U);
170}
171
172extern __inline __m256
173__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
174_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
175{
176 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
177 (__v8sf) __W,
178 (__mmask8) __U);
179}
180
181extern __inline __m256
182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
183_mm256_maskz_load_ps (__mmask8 __U, void const *__P)
184{
185 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
186 (__v8sf)
187 _mm256_setzero_ps (),
188 (__mmask8) __U);
189}
190
191extern __inline __m128
192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
193_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
194{
195 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
196 (__v4sf) __W,
197 (__mmask8) __U);
198}
199
200extern __inline __m128
201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
202_mm_maskz_load_ps (__mmask8 __U, void const *__P)
203{
204 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
205 (__v4sf)
206 _mm_setzero_ps (),
207 (__mmask8) __U);
208}
209
210extern __inline void
211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
212_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
213{
214 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
215 (__v8sf) __A,
216 (__mmask8) __U);
217}
218
219extern __inline void
220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
221_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
222{
223 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
224 (__v4sf) __A,
225 (__mmask8) __U);
226}
227
228extern __inline __m256i
229__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
230_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
231{
232 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
233 (__v4di) __W,
234 (__mmask8) __U);
235}
236
237extern __inline __m256i
238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
239_mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
240{
241 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
242 (__v4di)
243 _mm256_setzero_si256 (),
244 (__mmask8) __U);
245}
246
247extern __inline __m128i
248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
249_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
250{
251 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
252 (__v2di) __W,
253 (__mmask8) __U);
254}
255
256extern __inline __m128i
257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
258_mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
259{
260 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
261 (__v2di)
a25a7887 262 _mm_setzero_si128 (),
936c0fe4
AI
263 (__mmask8) __U);
264}
265
266extern __inline __m256i
267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
268_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
269{
270 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
271 (__v4di) __W,
272 (__mmask8)
273 __U);
274}
275
276extern __inline __m256i
277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
278_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
279{
280 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
281 (__v4di)
282 _mm256_setzero_si256 (),
283 (__mmask8)
284 __U);
285}
286
287extern __inline __m128i
288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
289_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
290{
291 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
292 (__v2di) __W,
293 (__mmask8)
294 __U);
295}
296
297extern __inline __m128i
298__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
299_mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
300{
301 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
302 (__v2di)
a25a7887 303 _mm_setzero_si128 (),
936c0fe4
AI
304 (__mmask8)
305 __U);
306}
307
308extern __inline void
309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
310_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
311{
312 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
313 (__v4di) __A,
314 (__mmask8) __U);
315}
316
317extern __inline void
318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
319_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
320{
321 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
322 (__v2di) __A,
323 (__mmask8) __U);
324}
325
326extern __inline __m256i
327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
328_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
329{
330 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
331 (__v8si) __W,
332 (__mmask8) __U);
333}
334
335extern __inline __m256i
336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
337_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
338{
339 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
340 (__v8si)
341 _mm256_setzero_si256 (),
342 (__mmask8) __U);
343}
344
345extern __inline __m128i
346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
347_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
348{
349 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
350 (__v4si) __W,
351 (__mmask8) __U);
352}
353
354extern __inline __m128i
355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
356_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
357{
358 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
359 (__v4si)
360 _mm_setzero_si128 (),
361 (__mmask8) __U);
362}
363
364extern __inline __m256i
365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
366_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
367{
368 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
369 (__v8si) __W,
370 (__mmask8)
371 __U);
372}
373
374extern __inline __m256i
375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
376_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
377{
378 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
379 (__v8si)
380 _mm256_setzero_si256 (),
381 (__mmask8)
382 __U);
383}
384
385extern __inline __m128i
386__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
387_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
388{
389 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
390 (__v4si) __W,
391 (__mmask8)
392 __U);
393}
394
395extern __inline __m128i
396__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
397_mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
398{
399 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
400 (__v4si)
401 _mm_setzero_si128 (),
402 (__mmask8)
403 __U);
404}
405
406extern __inline void
407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
408_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
409{
410 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
411 (__v8si) __A,
412 (__mmask8) __U);
413}
414
415extern __inline void
416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
417_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
418{
419 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
420 (__v4si) __A,
421 (__mmask8) __U);
422}
423
936c0fe4
AI
424extern __inline __m128d
425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
426_mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
427{
428 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
429 (__v2df) __B,
430 (__v2df) __W,
431 (__mmask8) __U);
432}
433
434extern __inline __m128d
435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
436_mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B)
437{
438 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
439 (__v2df) __B,
440 (__v2df)
441 _mm_setzero_pd (),
442 (__mmask8) __U);
443}
444
445extern __inline __m256d
446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
447_mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A,
448 __m256d __B)
449{
450 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
451 (__v4df) __B,
452 (__v4df) __W,
453 (__mmask8) __U);
454}
455
456extern __inline __m256d
457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
458_mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
459{
460 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
461 (__v4df) __B,
462 (__v4df)
463 _mm256_setzero_pd (),
464 (__mmask8) __U);
465}
466
467extern __inline __m128
468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 469_mm_mask_add_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
936c0fe4
AI
470{
471 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
472 (__v4sf) __B,
473 (__v4sf) __W,
474 (__mmask8) __U);
475}
476
477extern __inline __m128
478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 479_mm_maskz_add_ps (__mmask8 __U, __m128 __A, __m128 __B)
936c0fe4
AI
480{
481 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
482 (__v4sf) __B,
483 (__v4sf)
484 _mm_setzero_ps (),
485 (__mmask8) __U);
486}
487
488extern __inline __m256
489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 490_mm256_mask_add_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
936c0fe4
AI
491{
492 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
493 (__v8sf) __B,
494 (__v8sf) __W,
495 (__mmask8) __U);
496}
497
498extern __inline __m256
499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 500_mm256_maskz_add_ps (__mmask8 __U, __m256 __A, __m256 __B)
936c0fe4
AI
501{
502 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
503 (__v8sf) __B,
504 (__v8sf)
505 _mm256_setzero_ps (),
506 (__mmask8) __U);
507}
508
509extern __inline __m128d
510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511_mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
512{
513 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
514 (__v2df) __B,
515 (__v2df) __W,
516 (__mmask8) __U);
517}
518
519extern __inline __m128d
520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
521_mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B)
522{
523 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
524 (__v2df) __B,
525 (__v2df)
526 _mm_setzero_pd (),
527 (__mmask8) __U);
528}
529
530extern __inline __m256d
531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
532_mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
533 __m256d __B)
534{
535 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
536 (__v4df) __B,
537 (__v4df) __W,
538 (__mmask8) __U);
539}
540
541extern __inline __m256d
542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
543_mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
544{
545 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
546 (__v4df) __B,
547 (__v4df)
548 _mm256_setzero_pd (),
549 (__mmask8) __U);
550}
551
552extern __inline __m128
553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 554_mm_mask_sub_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
936c0fe4
AI
555{
556 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
557 (__v4sf) __B,
558 (__v4sf) __W,
559 (__mmask8) __U);
560}
561
562extern __inline __m128
563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 564_mm_maskz_sub_ps (__mmask8 __U, __m128 __A, __m128 __B)
936c0fe4
AI
565{
566 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
567 (__v4sf) __B,
568 (__v4sf)
569 _mm_setzero_ps (),
570 (__mmask8) __U);
571}
572
573extern __inline __m256
574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 575_mm256_mask_sub_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
936c0fe4
AI
576{
577 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
578 (__v8sf) __B,
579 (__v8sf) __W,
580 (__mmask8) __U);
581}
582
583extern __inline __m256
584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 585_mm256_maskz_sub_ps (__mmask8 __U, __m256 __A, __m256 __B)
936c0fe4
AI
586{
587 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
588 (__v8sf) __B,
589 (__v8sf)
590 _mm256_setzero_ps (),
591 (__mmask8) __U);
592}
593
594extern __inline void
595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
596_mm256_store_epi64 (void *__P, __m256i __A)
597{
598 *(__m256i *) __P = __A;
599}
600
601extern __inline void
602__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
603_mm_store_epi64 (void *__P, __m128i __A)
604{
605 *(__m128i *) __P = __A;
606}
607
608extern __inline __m256d
609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
610_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
611{
fc9cf6da 612 return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
936c0fe4
AI
613 (__v4df) __W,
614 (__mmask8) __U);
615}
616
617extern __inline __m256d
618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
619_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
620{
fc9cf6da 621 return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
936c0fe4
AI
622 (__v4df)
623 _mm256_setzero_pd (),
624 (__mmask8) __U);
625}
626
627extern __inline __m128d
628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
629_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
630{
fc9cf6da 631 return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
936c0fe4
AI
632 (__v2df) __W,
633 (__mmask8) __U);
634}
635
636extern __inline __m128d
637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
638_mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
639{
fc9cf6da 640 return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
936c0fe4
AI
641 (__v2df)
642 _mm_setzero_pd (),
643 (__mmask8) __U);
644}
645
646extern __inline void
647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
648_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
649{
fc9cf6da 650 __builtin_ia32_storeupd256_mask ((double *) __P,
936c0fe4
AI
651 (__v4df) __A,
652 (__mmask8) __U);
653}
654
655extern __inline void
656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
657_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
658{
fc9cf6da 659 __builtin_ia32_storeupd128_mask ((double *) __P,
936c0fe4
AI
660 (__v2df) __A,
661 (__mmask8) __U);
662}
663
664extern __inline __m256
665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
666_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
667{
fc9cf6da 668 return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
936c0fe4
AI
669 (__v8sf) __W,
670 (__mmask8) __U);
671}
672
673extern __inline __m256
674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
675_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
676{
fc9cf6da 677 return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
936c0fe4
AI
678 (__v8sf)
679 _mm256_setzero_ps (),
680 (__mmask8) __U);
681}
682
683extern __inline __m128
684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
685_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
686{
fc9cf6da 687 return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
936c0fe4
AI
688 (__v4sf) __W,
689 (__mmask8) __U);
690}
691
692extern __inline __m128
693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
694_mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
695{
fc9cf6da 696 return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
936c0fe4
AI
697 (__v4sf)
698 _mm_setzero_ps (),
699 (__mmask8) __U);
700}
701
702extern __inline void
703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
704_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
705{
fc9cf6da 706 __builtin_ia32_storeups256_mask ((float *) __P,
936c0fe4
AI
707 (__v8sf) __A,
708 (__mmask8) __U);
709}
710
711extern __inline void
712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
713_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
714{
fc9cf6da 715 __builtin_ia32_storeups128_mask ((float *) __P,
936c0fe4
AI
716 (__v4sf) __A,
717 (__mmask8) __U);
718}
719
720extern __inline __m256i
721__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
723{
fc9cf6da 724 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
936c0fe4
AI
725 (__v4di) __W,
726 (__mmask8) __U);
727}
728
729extern __inline __m256i
730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
731_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
732{
fc9cf6da 733 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
936c0fe4
AI
734 (__v4di)
735 _mm256_setzero_si256 (),
736 (__mmask8) __U);
737}
738
739extern __inline __m128i
740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
741_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
742{
fc9cf6da 743 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
936c0fe4
AI
744 (__v2di) __W,
745 (__mmask8) __U);
746}
747
748extern __inline __m128i
749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
750_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
751{
fc9cf6da 752 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
936c0fe4 753 (__v2di)
a25a7887 754 _mm_setzero_si128 (),
936c0fe4
AI
755 (__mmask8) __U);
756}
757
4c98bdad
SP
758extern __inline void
759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
760_mm256_storeu_epi64 (void *__P, __m256i __A)
761{
762 *(__m256i_u *) __P = (__m256i_u) __A;
763}
764
936c0fe4
AI
765extern __inline void
766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
767_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
768{
fc9cf6da 769 __builtin_ia32_storedqudi256_mask ((long long *) __P,
936c0fe4
AI
770 (__v4di) __A,
771 (__mmask8) __U);
772}
773
4c98bdad
SP
774extern __inline void
775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
776_mm_storeu_epi64 (void *__P, __m128i __A)
777{
778 *(__m128i_u *) __P = (__m128i_u) __A;
779}
780
936c0fe4
AI
781extern __inline void
782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
783_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
784{
fc9cf6da 785 __builtin_ia32_storedqudi128_mask ((long long *) __P,
936c0fe4
AI
786 (__v2di) __A,
787 (__mmask8) __U);
788}
789
790extern __inline __m256i
791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
792_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
793{
fc9cf6da 794 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
936c0fe4
AI
795 (__v8si) __W,
796 (__mmask8) __U);
797}
798
799extern __inline __m256i
800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
801_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
802{
fc9cf6da 803 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
936c0fe4
AI
804 (__v8si)
805 _mm256_setzero_si256 (),
806 (__mmask8) __U);
807}
808
809extern __inline __m128i
810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
811_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
812{
fc9cf6da 813 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
936c0fe4
AI
814 (__v4si) __W,
815 (__mmask8) __U);
816}
817
818extern __inline __m128i
819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
820_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
821{
fc9cf6da 822 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
936c0fe4
AI
823 (__v4si)
824 _mm_setzero_si128 (),
825 (__mmask8) __U);
826}
827
4c98bdad
SP
828extern __inline void
829__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
830_mm256_storeu_epi32 (void *__P, __m256i __A)
831{
832 *(__m256i_u *) __P = (__m256i_u) __A;
833}
834
936c0fe4
AI
835extern __inline void
836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
837_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
838{
fc9cf6da 839 __builtin_ia32_storedqusi256_mask ((int *) __P,
936c0fe4
AI
840 (__v8si) __A,
841 (__mmask8) __U);
842}
843
4c98bdad
SP
844extern __inline void
845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
846_mm_storeu_epi32 (void *__P, __m128i __A)
847{
848 *(__m128i_u *) __P = (__m128i_u) __A;
849}
850
936c0fe4
AI
851extern __inline void
852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
853_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
854{
fc9cf6da 855 __builtin_ia32_storedqusi128_mask ((int *) __P,
936c0fe4
AI
856 (__v4si) __A,
857 (__mmask8) __U);
858}
859
860extern __inline __m256i
861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
862_mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
863{
864 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
865 (__v8si) __W,
866 (__mmask8) __U);
867}
868
869extern __inline __m256i
870__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
871_mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A)
872{
873 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
874 (__v8si)
875 _mm256_setzero_si256 (),
876 (__mmask8) __U);
877}
878
879extern __inline __m128i
880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
881_mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
882{
883 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
884 (__v4si) __W,
885 (__mmask8) __U);
886}
887
888extern __inline __m128i
889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
890_mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A)
891{
892 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
893 (__v4si)
894 _mm_setzero_si128 (),
895 (__mmask8) __U);
896}
897
898extern __inline __m256i
899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
900_mm256_abs_epi64 (__m256i __A)
901{
902 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
903 (__v4di)
904 _mm256_setzero_si256 (),
905 (__mmask8) -1);
906}
907
908extern __inline __m256i
909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
910_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
911{
912 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
913 (__v4di) __W,
914 (__mmask8) __U);
915}
916
917extern __inline __m256i
918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
919_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A)
920{
921 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
922 (__v4di)
923 _mm256_setzero_si256 (),
924 (__mmask8) __U);
925}
926
927extern __inline __m128i
928__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
929_mm_abs_epi64 (__m128i __A)
930{
931 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
932 (__v2di)
a25a7887 933 _mm_setzero_si128 (),
936c0fe4
AI
934 (__mmask8) -1);
935}
936
937extern __inline __m128i
938__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
939_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
940{
941 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
942 (__v2di) __W,
943 (__mmask8) __U);
944}
945
946extern __inline __m128i
947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
948_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
949{
950 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
951 (__v2di)
a25a7887 952 _mm_setzero_si128 (),
936c0fe4
AI
953 (__mmask8) __U);
954}
955
956extern __inline __m128i
957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
958_mm256_cvtpd_epu32 (__m256d __A)
959{
960 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
961 (__v4si)
962 _mm_setzero_si128 (),
963 (__mmask8) -1);
964}
965
966extern __inline __m128i
967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
968_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
969{
970 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
971 (__v4si) __W,
972 (__mmask8) __U);
973}
974
975extern __inline __m128i
976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
977_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A)
978{
979 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
980 (__v4si)
981 _mm_setzero_si128 (),
982 (__mmask8) __U);
983}
984
985extern __inline __m128i
986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
987_mm_cvtpd_epu32 (__m128d __A)
988{
989 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
990 (__v4si)
991 _mm_setzero_si128 (),
992 (__mmask8) -1);
993}
994
995extern __inline __m128i
996__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
997_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
998{
999 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1000 (__v4si) __W,
1001 (__mmask8) __U);
1002}
1003
1004extern __inline __m128i
1005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1006_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A)
1007{
1008 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1009 (__v4si)
1010 _mm_setzero_si128 (),
1011 (__mmask8) __U);
1012}
1013
1014extern __inline __m256i
1015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1016_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
1017{
1018 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1019 (__v8si) __W,
1020 (__mmask8) __U);
1021}
1022
1023extern __inline __m256i
1024__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1025_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A)
1026{
1027 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1028 (__v8si)
1029 _mm256_setzero_si256 (),
1030 (__mmask8) __U);
1031}
1032
1033extern __inline __m128i
1034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1035_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
1036{
1037 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1038 (__v4si) __W,
1039 (__mmask8) __U);
1040}
1041
1042extern __inline __m128i
1043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1044_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A)
1045{
1046 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1047 (__v4si)
1048 _mm_setzero_si128 (),
1049 (__mmask8) __U);
1050}
1051
1052extern __inline __m256i
1053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1054_mm256_cvttps_epu32 (__m256 __A)
1055{
1056 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1057 (__v8si)
1058 _mm256_setzero_si256 (),
1059 (__mmask8) -1);
1060}
1061
1062extern __inline __m256i
1063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1064_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
1065{
1066 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1067 (__v8si) __W,
1068 (__mmask8) __U);
1069}
1070
1071extern __inline __m256i
1072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1073_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A)
1074{
1075 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1076 (__v8si)
1077 _mm256_setzero_si256 (),
1078 (__mmask8) __U);
1079}
1080
1081extern __inline __m128i
1082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1083_mm_cvttps_epu32 (__m128 __A)
1084{
1085 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1086 (__v4si)
1087 _mm_setzero_si128 (),
1088 (__mmask8) -1);
1089}
1090
1091extern __inline __m128i
1092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1093_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
1094{
1095 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1096 (__v4si) __W,
1097 (__mmask8) __U);
1098}
1099
1100extern __inline __m128i
1101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1102_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A)
1103{
1104 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1105 (__v4si)
1106 _mm_setzero_si128 (),
1107 (__mmask8) __U);
1108}
1109
1110extern __inline __m128i
1111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1112_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1113{
1114 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1115 (__v4si) __W,
1116 (__mmask8) __U);
1117}
1118
1119extern __inline __m128i
1120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1121_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A)
1122{
1123 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1124 (__v4si)
1125 _mm_setzero_si128 (),
1126 (__mmask8) __U);
1127}
1128
1129extern __inline __m128i
1130__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1131_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1132{
1133 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1134 (__v4si) __W,
1135 (__mmask8) __U);
1136}
1137
1138extern __inline __m128i
1139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1140_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A)
1141{
1142 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1143 (__v4si)
1144 _mm_setzero_si128 (),
1145 (__mmask8) __U);
1146}
1147
1148extern __inline __m128i
1149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1150_mm256_cvttpd_epu32 (__m256d __A)
1151{
1152 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1153 (__v4si)
1154 _mm_setzero_si128 (),
1155 (__mmask8) -1);
1156}
1157
1158extern __inline __m128i
1159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1160_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
1161{
1162 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1163 (__v4si) __W,
1164 (__mmask8) __U);
1165}
1166
1167extern __inline __m128i
1168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1169_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A)
1170{
1171 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1172 (__v4si)
1173 _mm_setzero_si128 (),
1174 (__mmask8) __U);
1175}
1176
1177extern __inline __m128i
1178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1179_mm_cvttpd_epu32 (__m128d __A)
1180{
1181 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1182 (__v4si)
1183 _mm_setzero_si128 (),
1184 (__mmask8) -1);
1185}
1186
1187extern __inline __m128i
1188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1189_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
1190{
1191 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1192 (__v4si) __W,
1193 (__mmask8) __U);
1194}
1195
1196extern __inline __m128i
1197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1198_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A)
1199{
1200 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1201 (__v4si)
1202 _mm_setzero_si128 (),
1203 (__mmask8) __U);
1204}
1205
1206extern __inline __m128i
1207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1208_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1209{
1210 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1211 (__v4si) __W,
1212 (__mmask8) __U);
1213}
1214
1215extern __inline __m128i
1216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1217_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A)
1218{
1219 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1220 (__v4si)
1221 _mm_setzero_si128 (),
1222 (__mmask8) __U);
1223}
1224
1225extern __inline __m128i
1226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1227_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1228{
1229 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1230 (__v4si) __W,
1231 (__mmask8) __U);
1232}
1233
1234extern __inline __m128i
1235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1236_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A)
1237{
1238 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1239 (__v4si)
1240 _mm_setzero_si128 (),
1241 (__mmask8) __U);
1242}
1243
1244extern __inline __m256d
1245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1246_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1247{
1248 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1249 (__v4df) __W,
1250 (__mmask8) __U);
1251}
1252
1253extern __inline __m256d
1254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1255_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1256{
1257 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1258 (__v4df)
1259 _mm256_setzero_pd (),
1260 (__mmask8) __U);
1261}
1262
1263extern __inline __m128d
1264__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1265_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1266{
1267 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1268 (__v2df) __W,
1269 (__mmask8) __U);
1270}
1271
1272extern __inline __m128d
1273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1274_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1275{
1276 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1277 (__v2df)
1278 _mm_setzero_pd (),
1279 (__mmask8) __U);
1280}
1281
1282extern __inline __m256d
1283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1284_mm256_cvtepu32_pd (__m128i __A)
1285{
1286 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1287 (__v4df)
1288 _mm256_setzero_pd (),
1289 (__mmask8) -1);
1290}
1291
1292extern __inline __m256d
1293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1294_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1295{
1296 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1297 (__v4df) __W,
1298 (__mmask8) __U);
1299}
1300
1301extern __inline __m256d
1302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1303_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1304{
1305 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1306 (__v4df)
1307 _mm256_setzero_pd (),
1308 (__mmask8) __U);
1309}
1310
1311extern __inline __m128d
1312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1313_mm_cvtepu32_pd (__m128i __A)
1314{
1315 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1316 (__v2df)
1317 _mm_setzero_pd (),
1318 (__mmask8) -1);
1319}
1320
1321extern __inline __m128d
1322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1323_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1324{
1325 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1326 (__v2df) __W,
1327 (__mmask8) __U);
1328}
1329
1330extern __inline __m128d
1331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1332_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1333{
1334 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1335 (__v2df)
1336 _mm_setzero_pd (),
1337 (__mmask8) __U);
1338}
1339
1340extern __inline __m256
1341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1342_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1343{
1344 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1345 (__v8sf) __W,
1346 (__mmask8) __U);
1347}
1348
1349extern __inline __m256
1350__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 1351_mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A)
936c0fe4
AI
1352{
1353 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1354 (__v8sf)
1355 _mm256_setzero_ps (),
1356 (__mmask8) __U);
1357}
1358
1359extern __inline __m128
1360__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1361_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1362{
1363 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1364 (__v4sf) __W,
1365 (__mmask8) __U);
1366}
1367
1368extern __inline __m128
1369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 1370_mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A)
936c0fe4
AI
1371{
1372 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1373 (__v4sf)
1374 _mm_setzero_ps (),
1375 (__mmask8) __U);
1376}
1377
1378extern __inline __m256
1379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1380_mm256_cvtepu32_ps (__m256i __A)
1381{
1382 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1383 (__v8sf)
1384 _mm256_setzero_ps (),
1385 (__mmask8) -1);
1386}
1387
1388extern __inline __m256
1389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1390_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1391{
1392 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1393 (__v8sf) __W,
1394 (__mmask8) __U);
1395}
1396
1397extern __inline __m256
1398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1399_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A)
1400{
1401 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1402 (__v8sf)
1403 _mm256_setzero_ps (),
1404 (__mmask8) __U);
1405}
1406
1407extern __inline __m128
1408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1409_mm_cvtepu32_ps (__m128i __A)
1410{
1411 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1412 (__v4sf)
1413 _mm_setzero_ps (),
1414 (__mmask8) -1);
1415}
1416
1417extern __inline __m128
1418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1419_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1420{
1421 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1422 (__v4sf) __W,
1423 (__mmask8) __U);
1424}
1425
1426extern __inline __m128
1427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1428_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A)
1429{
1430 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1431 (__v4sf)
1432 _mm_setzero_ps (),
1433 (__mmask8) __U);
1434}
1435
1436extern __inline __m256d
1437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1438_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A)
1439{
1440 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1441 (__v4df) __W,
1442 (__mmask8) __U);
1443}
1444
1445extern __inline __m256d
1446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1447_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1448{
1449 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1450 (__v4df)
1451 _mm256_setzero_pd (),
1452 (__mmask8) __U);
1453}
1454
1455extern __inline __m128d
1456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1457_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A)
1458{
1459 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1460 (__v2df) __W,
1461 (__mmask8) __U);
1462}
1463
1464extern __inline __m128d
1465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1466_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1467{
1468 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1469 (__v2df)
1470 _mm_setzero_pd (),
1471 (__mmask8) __U);
1472}
1473
1474extern __inline __m128i
1475__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1476_mm_cvtepi32_epi8 (__m128i __A)
1477{
1478 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
a25a7887
JJ
1479 (__v16qi)
1480 _mm_undefined_si128 (),
936c0fe4
AI
1481 (__mmask8) -1);
1482}
1483
1484extern __inline void
1485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1486_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1487{
1488 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1489}
1490
1491extern __inline __m128i
1492__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1493_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1494{
1495 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1496 (__v16qi) __O, __M);
1497}
1498
1499extern __inline __m128i
1500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1501_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
1502{
1503 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1504 (__v16qi)
1505 _mm_setzero_si128 (),
1506 __M);
1507}
1508
1509extern __inline __m128i
1510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1511_mm256_cvtepi32_epi8 (__m256i __A)
1512{
1513 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
a25a7887
JJ
1514 (__v16qi)
1515 _mm_undefined_si128 (),
936c0fe4
AI
1516 (__mmask8) -1);
1517}
1518
1519extern __inline __m128i
1520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1521_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1522{
1523 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1524 (__v16qi) __O, __M);
1525}
1526
1527extern __inline void
1528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1529_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1530{
1531 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1532}
1533
1534extern __inline __m128i
1535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1536_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
1537{
1538 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1539 (__v16qi)
1540 _mm_setzero_si128 (),
1541 __M);
1542}
1543
1544extern __inline __m128i
1545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1546_mm_cvtsepi32_epi8 (__m128i __A)
1547{
1548 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
a25a7887
JJ
1549 (__v16qi)
1550 _mm_undefined_si128 (),
936c0fe4
AI
1551 (__mmask8) -1);
1552}
1553
1554extern __inline void
1555__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1556_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1557{
1558 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1559}
1560
1561extern __inline __m128i
1562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1563_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1564{
1565 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1566 (__v16qi) __O, __M);
1567}
1568
1569extern __inline __m128i
1570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1571_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
1572{
1573 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1574 (__v16qi)
1575 _mm_setzero_si128 (),
1576 __M);
1577}
1578
1579extern __inline __m128i
1580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1581_mm256_cvtsepi32_epi8 (__m256i __A)
1582{
1583 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
a25a7887
JJ
1584 (__v16qi)
1585 _mm_undefined_si128 (),
936c0fe4
AI
1586 (__mmask8) -1);
1587}
1588
1589extern __inline void
1590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1591_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1592{
1593 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1594}
1595
1596extern __inline __m128i
1597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1598_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1599{
1600 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1601 (__v16qi) __O, __M);
1602}
1603
1604extern __inline __m128i
1605__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1606_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
1607{
1608 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1609 (__v16qi)
1610 _mm_setzero_si128 (),
1611 __M);
1612}
1613
1614extern __inline __m128i
1615__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1616_mm_cvtusepi32_epi8 (__m128i __A)
1617{
1618 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
a25a7887
JJ
1619 (__v16qi)
1620 _mm_undefined_si128 (),
936c0fe4
AI
1621 (__mmask8) -1);
1622}
1623
1624extern __inline void
1625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1626_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1627{
1628 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1629}
1630
1631extern __inline __m128i
1632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1633_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1634{
1635 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1636 (__v16qi) __O,
1637 __M);
1638}
1639
1640extern __inline __m128i
1641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1642_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
1643{
1644 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1645 (__v16qi)
1646 _mm_setzero_si128 (),
1647 __M);
1648}
1649
1650extern __inline __m128i
1651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1652_mm256_cvtusepi32_epi8 (__m256i __A)
1653{
1654 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
a25a7887
JJ
1655 (__v16qi)
1656 _mm_undefined_si128 (),
936c0fe4
AI
1657 (__mmask8) -1);
1658}
1659
1660extern __inline void
1661__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1662_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1663{
1664 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
1665}
1666
1667extern __inline __m128i
1668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1669_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1670{
1671 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1672 (__v16qi) __O,
1673 __M);
1674}
1675
1676extern __inline __m128i
1677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1678_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
1679{
1680 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1681 (__v16qi)
1682 _mm_setzero_si128 (),
1683 __M);
1684}
1685
1686extern __inline __m128i
1687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1688_mm_cvtepi32_epi16 (__m128i __A)
1689{
1690 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
a25a7887
JJ
1691 (__v8hi)
1692 _mm_setzero_si128 (),
936c0fe4
AI
1693 (__mmask8) -1);
1694}
1695
1696extern __inline void
1697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1698_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1699{
1700 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1701}
1702
1703extern __inline __m128i
1704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1706{
1707 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1708 (__v8hi) __O, __M);
1709}
1710
1711extern __inline __m128i
1712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1713_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
1714{
1715 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1716 (__v8hi)
1717 _mm_setzero_si128 (),
1718 __M);
1719}
1720
1721extern __inline __m128i
1722__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1723_mm256_cvtepi32_epi16 (__m256i __A)
1724{
1725 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
a25a7887
JJ
1726 (__v8hi)
1727 _mm_setzero_si128 (),
936c0fe4
AI
1728 (__mmask8) -1);
1729}
1730
9ab4c07a 1731extern __inline void
936c0fe4
AI
1732__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1733_mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1734{
1735 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1736}
1737
1738extern __inline __m128i
1739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1740_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1741{
1742 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1743 (__v8hi) __O, __M);
1744}
1745
1746extern __inline __m128i
1747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1748_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
1749{
1750 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1751 (__v8hi)
1752 _mm_setzero_si128 (),
1753 __M);
1754}
1755
1756extern __inline __m128i
1757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1758_mm_cvtsepi32_epi16 (__m128i __A)
1759{
1760 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
a25a7887
JJ
1761 (__v8hi)
1762 _mm_setzero_si128 (),
936c0fe4
AI
1763 (__mmask8) -1);
1764}
1765
1766extern __inline void
1767__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1768_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1769{
1770 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1771}
1772
1773extern __inline __m128i
1774__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1775_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1776{
1777 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1778 (__v8hi)__O,
1779 __M);
1780}
1781
1782extern __inline __m128i
1783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1784_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
1785{
1786 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1787 (__v8hi)
1788 _mm_setzero_si128 (),
1789 __M);
1790}
1791
1792extern __inline __m128i
1793__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1794_mm256_cvtsepi32_epi16 (__m256i __A)
1795{
1796 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
a25a7887
JJ
1797 (__v8hi)
1798 _mm_undefined_si128 (),
936c0fe4
AI
1799 (__mmask8) -1);
1800}
1801
1802extern __inline void
1803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1804_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1805{
1806 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1807}
1808
1809extern __inline __m128i
1810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1811_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1812{
1813 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1814 (__v8hi) __O, __M);
1815}
1816
1817extern __inline __m128i
1818__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1819_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
1820{
1821 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1822 (__v8hi)
1823 _mm_setzero_si128 (),
1824 __M);
1825}
1826
1827extern __inline __m128i
1828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1829_mm_cvtusepi32_epi16 (__m128i __A)
1830{
1831 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
a25a7887
JJ
1832 (__v8hi)
1833 _mm_undefined_si128 (),
936c0fe4
AI
1834 (__mmask8) -1);
1835}
1836
9ab4c07a 1837extern __inline void
936c0fe4
AI
1838__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1839_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1840{
1841 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1842}
1843
1844extern __inline __m128i
1845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1846_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1847{
1848 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1849 (__v8hi) __O, __M);
1850}
1851
1852extern __inline __m128i
1853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1854_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
1855{
1856 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1857 (__v8hi)
1858 _mm_setzero_si128 (),
1859 __M);
1860}
1861
1862extern __inline __m128i
1863__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1864_mm256_cvtusepi32_epi16 (__m256i __A)
1865{
1866 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
a25a7887
JJ
1867 (__v8hi)
1868 _mm_undefined_si128 (),
936c0fe4
AI
1869 (__mmask8) -1);
1870}
1871
1872extern __inline void
1873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1874_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1875{
1876 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1877}
1878
1879extern __inline __m128i
1880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1881_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1882{
1883 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1884 (__v8hi) __O, __M);
1885}
1886
1887extern __inline __m128i
1888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1889_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
1890{
1891 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1892 (__v8hi)
1893 _mm_setzero_si128 (),
1894 __M);
1895}
1896
1897extern __inline __m128i
1898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1899_mm_cvtepi64_epi8 (__m128i __A)
1900{
1901 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
a25a7887
JJ
1902 (__v16qi)
1903 _mm_undefined_si128 (),
936c0fe4
AI
1904 (__mmask8) -1);
1905}
1906
1907extern __inline void
1908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1909_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1910{
1911 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1912}
1913
1914extern __inline __m128i
1915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1916_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1917{
1918 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1919 (__v16qi) __O, __M);
1920}
1921
1922extern __inline __m128i
1923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1924_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
1925{
1926 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1927 (__v16qi)
1928 _mm_setzero_si128 (),
1929 __M);
1930}
1931
1932extern __inline __m128i
1933__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1934_mm256_cvtepi64_epi8 (__m256i __A)
1935{
1936 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
a25a7887
JJ
1937 (__v16qi)
1938 _mm_undefined_si128 (),
936c0fe4
AI
1939 (__mmask8) -1);
1940}
1941
1942extern __inline void
1943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1944_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1945{
1946 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1947}
1948
1949extern __inline __m128i
1950__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1951_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1952{
1953 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1954 (__v16qi) __O, __M);
1955}
1956
1957extern __inline __m128i
1958__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1959_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
1960{
1961 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1962 (__v16qi)
1963 _mm_setzero_si128 (),
1964 __M);
1965}
1966
1967extern __inline __m128i
1968__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1969_mm_cvtsepi64_epi8 (__m128i __A)
1970{
1971 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
a25a7887
JJ
1972 (__v16qi)
1973 _mm_undefined_si128 (),
936c0fe4
AI
1974 (__mmask8) -1);
1975}
1976
1977extern __inline void
1978__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1979_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1980{
1981 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1982}
1983
1984extern __inline __m128i
1985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1986_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1987{
1988 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1989 (__v16qi) __O, __M);
1990}
1991
1992extern __inline __m128i
1993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1994_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
1995{
1996 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1997 (__v16qi)
1998 _mm_setzero_si128 (),
1999 __M);
2000}
2001
2002extern __inline __m128i
2003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2004_mm256_cvtsepi64_epi8 (__m256i __A)
2005{
2006 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
a25a7887
JJ
2007 (__v16qi)
2008 _mm_undefined_si128 (),
936c0fe4
AI
2009 (__mmask8) -1);
2010}
2011
2012extern __inline void
2013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2014_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
2015{
2016 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
2017}
2018
2019extern __inline __m128i
2020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2021_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
2022{
2023 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
2024 (__v16qi) __O, __M);
2025}
2026
2027extern __inline __m128i
2028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2029_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
2030{
2031 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
2032 (__v16qi)
2033 _mm_setzero_si128 (),
2034 __M);
2035}
2036
2037extern __inline __m128i
2038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2039_mm_cvtusepi64_epi8 (__m128i __A)
2040{
2041 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
a25a7887
JJ
2042 (__v16qi)
2043 _mm_undefined_si128 (),
936c0fe4
AI
2044 (__mmask8) -1);
2045}
2046
2047extern __inline void
2048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2049_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
2050{
2051 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
2052}
2053
2054extern __inline __m128i
2055__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2056_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
2057{
2058 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2059 (__v16qi) __O,
2060 __M);
2061}
2062
2063extern __inline __m128i
2064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2065_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
2066{
2067 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2068 (__v16qi)
2069 _mm_setzero_si128 (),
2070 __M);
2071}
2072
2073extern __inline __m128i
2074__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2075_mm256_cvtusepi64_epi8 (__m256i __A)
2076{
2077 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
a25a7887
JJ
2078 (__v16qi)
2079 _mm_undefined_si128 (),
936c0fe4
AI
2080 (__mmask8) -1);
2081}
2082
2083extern __inline void
2084__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2085_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
2086{
2087 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
2088}
2089
2090extern __inline __m128i
2091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2092_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
2093{
2094 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2095 (__v16qi) __O,
2096 __M);
2097}
2098
2099extern __inline __m128i
2100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2101_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
2102{
2103 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2104 (__v16qi)
2105 _mm_setzero_si128 (),
2106 __M);
2107}
2108
2109extern __inline __m128i
2110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2111_mm_cvtepi64_epi16 (__m128i __A)
2112{
2113 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
a25a7887
JJ
2114 (__v8hi)
2115 _mm_undefined_si128 (),
936c0fe4
AI
2116 (__mmask8) -1);
2117}
2118
2119extern __inline void
2120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2121_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2122{
2123 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2124}
2125
2126extern __inline __m128i
2127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2128_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2129{
2130 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2131 (__v8hi)__O,
2132 __M);
2133}
2134
2135extern __inline __m128i
2136__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2137_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
2138{
2139 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2140 (__v8hi)
2141 _mm_setzero_si128 (),
2142 __M);
2143}
2144
2145extern __inline __m128i
2146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2147_mm256_cvtepi64_epi16 (__m256i __A)
2148{
2149 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
a25a7887
JJ
2150 (__v8hi)
2151 _mm_undefined_si128 (),
936c0fe4
AI
2152 (__mmask8) -1);
2153}
2154
2155extern __inline void
2156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2157_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2158{
2159 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2160}
2161
2162extern __inline __m128i
2163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2164_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2165{
2166 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2167 (__v8hi) __O, __M);
2168}
2169
2170extern __inline __m128i
2171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2172_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
2173{
2174 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2175 (__v8hi)
2176 _mm_setzero_si128 (),
2177 __M);
2178}
2179
2180extern __inline __m128i
2181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2182_mm_cvtsepi64_epi16 (__m128i __A)
2183{
2184 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
a25a7887
JJ
2185 (__v8hi)
2186 _mm_undefined_si128 (),
936c0fe4
AI
2187 (__mmask8) -1);
2188}
2189
2190extern __inline void
2191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2192_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2193{
2194 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2195}
2196
2197extern __inline __m128i
2198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2199_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2200{
2201 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2202 (__v8hi) __O, __M);
2203}
2204
2205extern __inline __m128i
2206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2207_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
2208{
2209 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2210 (__v8hi)
2211 _mm_setzero_si128 (),
2212 __M);
2213}
2214
2215extern __inline __m128i
2216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2217_mm256_cvtsepi64_epi16 (__m256i __A)
2218{
2219 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
a25a7887
JJ
2220 (__v8hi)
2221 _mm_undefined_si128 (),
936c0fe4
AI
2222 (__mmask8) -1);
2223}
2224
2225extern __inline void
2226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2227_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2228{
2229 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2230}
2231
2232extern __inline __m128i
2233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2234_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2235{
2236 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2237 (__v8hi) __O, __M);
2238}
2239
2240extern __inline __m128i
2241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2242_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
2243{
2244 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2245 (__v8hi)
2246 _mm_setzero_si128 (),
2247 __M);
2248}
2249
2250extern __inline __m128i
2251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2252_mm_cvtusepi64_epi16 (__m128i __A)
2253{
2254 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
a25a7887
JJ
2255 (__v8hi)
2256 _mm_undefined_si128 (),
936c0fe4
AI
2257 (__mmask8) -1);
2258}
2259
2260extern __inline void
2261__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2262_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2263{
2264 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2265}
2266
2267extern __inline __m128i
2268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2269_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2270{
2271 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2272 (__v8hi) __O, __M);
2273}
2274
2275extern __inline __m128i
2276__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2277_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
2278{
2279 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2280 (__v8hi)
2281 _mm_setzero_si128 (),
2282 __M);
2283}
2284
2285extern __inline __m128i
2286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2287_mm256_cvtusepi64_epi16 (__m256i __A)
2288{
2289 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
a25a7887
JJ
2290 (__v8hi)
2291 _mm_undefined_si128 (),
936c0fe4
AI
2292 (__mmask8) -1);
2293}
2294
2295extern __inline void
2296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2297_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2298{
2299 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2300}
2301
2302extern __inline __m128i
2303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2304_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2305{
2306 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2307 (__v8hi) __O, __M);
2308}
2309
2310extern __inline __m128i
2311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2312_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
2313{
2314 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2315 (__v8hi)
2316 _mm_setzero_si128 (),
2317 __M);
2318}
2319
2320extern __inline __m128i
2321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2322_mm_cvtepi64_epi32 (__m128i __A)
2323{
2324 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
a25a7887
JJ
2325 (__v4si)
2326 _mm_undefined_si128 (),
936c0fe4
AI
2327 (__mmask8) -1);
2328}
2329
2330extern __inline void
2331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2332_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2333{
2334 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2335}
2336
2337extern __inline __m128i
2338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2339_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2340{
2341 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2342 (__v4si) __O, __M);
2343}
2344
2345extern __inline __m128i
2346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2347_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
2348{
2349 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2350 (__v4si)
2351 _mm_setzero_si128 (),
2352 __M);
2353}
2354
2355extern __inline __m128i
2356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2357_mm256_cvtepi64_epi32 (__m256i __A)
2358{
2359 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
a25a7887
JJ
2360 (__v4si)
2361 _mm_undefined_si128 (),
936c0fe4
AI
2362 (__mmask8) -1);
2363}
2364
2365extern __inline void
2366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2367_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2368{
2369 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2370}
2371
2372extern __inline __m128i
2373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2374_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2375{
2376 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2377 (__v4si) __O, __M);
2378}
2379
2380extern __inline __m128i
2381__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2382_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
2383{
2384 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2385 (__v4si)
2386 _mm_setzero_si128 (),
2387 __M);
2388}
2389
2390extern __inline __m128i
2391__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2392_mm_cvtsepi64_epi32 (__m128i __A)
2393{
2394 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
a25a7887
JJ
2395 (__v4si)
2396 _mm_undefined_si128 (),
936c0fe4
AI
2397 (__mmask8) -1);
2398}
2399
9ab4c07a 2400extern __inline void
936c0fe4
AI
2401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2402_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2403{
2404 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2405}
2406
2407extern __inline __m128i
2408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2409_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2410{
2411 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2412 (__v4si) __O, __M);
2413}
2414
2415extern __inline __m128i
2416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2417_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
2418{
2419 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2420 (__v4si)
2421 _mm_setzero_si128 (),
2422 __M);
2423}
2424
2425extern __inline __m128i
2426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2427_mm256_cvtsepi64_epi32 (__m256i __A)
2428{
2429 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
a25a7887
JJ
2430 (__v4si)
2431 _mm_undefined_si128 (),
936c0fe4
AI
2432 (__mmask8) -1);
2433}
2434
2435extern __inline void
2436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2437_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2438{
2439 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2440}
2441
2442extern __inline __m128i
2443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2444_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2445{
2446 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2447 (__v4si)__O,
2448 __M);
2449}
2450
2451extern __inline __m128i
2452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2453_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
2454{
2455 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2456 (__v4si)
2457 _mm_setzero_si128 (),
2458 __M);
2459}
2460
2461extern __inline __m128i
2462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2463_mm_cvtusepi64_epi32 (__m128i __A)
2464{
2465 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
a25a7887
JJ
2466 (__v4si)
2467 _mm_undefined_si128 (),
936c0fe4
AI
2468 (__mmask8) -1);
2469}
2470
2471extern __inline void
2472__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2473_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2474{
2475 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2476}
2477
2478extern __inline __m128i
2479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2480_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2481{
2482 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2483 (__v4si) __O, __M);
2484}
2485
2486extern __inline __m128i
2487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2488_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
2489{
2490 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2491 (__v4si)
2492 _mm_setzero_si128 (),
2493 __M);
2494}
2495
2496extern __inline __m128i
2497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2498_mm256_cvtusepi64_epi32 (__m256i __A)
2499{
2500 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
a25a7887
JJ
2501 (__v4si)
2502 _mm_undefined_si128 (),
936c0fe4
AI
2503 (__mmask8) -1);
2504}
2505
2506extern __inline void
2507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2508_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2509{
2510 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2511}
2512
2513extern __inline __m128i
2514__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2515_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2516{
2517 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2518 (__v4si) __O, __M);
2519}
2520
2521extern __inline __m128i
2522__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2523_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
2524{
2525 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2526 (__v4si)
2527 _mm_setzero_si128 (),
2528 __M);
2529}
2530
2531extern __inline __m256
2532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2533_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
2534{
2535 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2536 (__v8sf) __O,
2537 __M);
2538}
2539
2540extern __inline __m256
2541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2542_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2543{
2544 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2545 (__v8sf)
2546 _mm256_setzero_ps (),
2547 __M);
2548}
2549
2550extern __inline __m128
2551__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2552_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
2553{
2554 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2555 (__v4sf) __O,
2556 __M);
2557}
2558
2559extern __inline __m128
2560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2561_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2562{
2563 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2564 (__v4sf)
2565 _mm_setzero_ps (),
2566 __M);
2567}
2568
2569extern __inline __m256d
2570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2571_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
2572{
2573 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2574 (__v4df) __O,
2575 __M);
2576}
2577
2578extern __inline __m256d
2579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2580_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
2581{
2582 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2583 (__v4df)
2584 _mm256_setzero_pd (),
2585 __M);
2586}
2587
2588extern __inline __m256i
2589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2590_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
2591{
2592 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2593 (__v8si) __O,
2594 __M);
2595}
2596
2597extern __inline __m256i
2598__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2599_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2600{
2601 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2602 (__v8si)
2603 _mm256_setzero_si256 (),
2604 __M);
2605}
2606
2607extern __inline __m256i
2608__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2609_mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A)
2610{
2611 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O,
2612 __M);
2613}
2614
2615extern __inline __m256i
2616__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2617_mm256_maskz_set1_epi32 (__mmask8 __M, int __A)
2618{
2619 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A,
2620 (__v8si)
2621 _mm256_setzero_si256 (),
2622 __M);
2623}
2624
2625extern __inline __m128i
2626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2627_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2628{
2629 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2630 (__v4si) __O,
2631 __M);
2632}
2633
2634extern __inline __m128i
2635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2636_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2637{
2638 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2639 (__v4si)
2640 _mm_setzero_si128 (),
2641 __M);
2642}
2643
2644extern __inline __m128i
2645__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2646_mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A)
2647{
2648 return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O,
2649 __M);
2650}
2651
2652extern __inline __m128i
2653__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2654_mm_maskz_set1_epi32 (__mmask8 __M, int __A)
2655{
a25a7887
JJ
2656 return (__m128i)
2657 __builtin_ia32_pbroadcastd128_gpr_mask (__A,
2658 (__v4si) _mm_setzero_si128 (),
2659 __M);
936c0fe4
AI
2660}
2661
2662extern __inline __m256i
2663__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2664_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
2665{
2666 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2667 (__v4di) __O,
2668 __M);
2669}
2670
2671extern __inline __m256i
2672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2673_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2674{
2675 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2676 (__v4di)
2677 _mm256_setzero_si256 (),
2678 __M);
2679}
2680
2681extern __inline __m256i
2682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2683_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
2684{
936c0fe4
AI
2685 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
2686 __M);
936c0fe4
AI
2687}
2688
2689extern __inline __m256i
2690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2691_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
2692{
936c0fe4
AI
2693 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
2694 (__v4di)
2695 _mm256_setzero_si256 (),
2696 __M);
936c0fe4
AI
2697}
2698
2699extern __inline __m128i
2700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2701_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
2702{
2703 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2704 (__v2di) __O,
2705 __M);
2706}
2707
2708extern __inline __m128i
2709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2710_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2711{
2712 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2713 (__v2di)
2714 _mm_setzero_si128 (),
2715 __M);
2716}
2717
2718extern __inline __m128i
2719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2720_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
2721{
936c0fe4
AI
2722 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
2723 __M);
936c0fe4
AI
2724}
2725
2726extern __inline __m128i
2727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2728_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
2729{
a25a7887
JJ
2730 return (__m128i)
2731 __builtin_ia32_pbroadcastq128_gpr_mask (__A,
2732 (__v2di) _mm_setzero_si128 (),
2733 __M);
936c0fe4
AI
2734}
2735
2736extern __inline __m256
2737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2738_mm256_broadcast_f32x4 (__m128 __A)
2739{
2740 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2741 (__v8sf)_mm256_undefined_pd (),
c42b0bdf 2742 (__mmask8) -1);
936c0fe4
AI
2743}
2744
2745extern __inline __m256
2746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2747_mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
2748{
2749 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2750 (__v8sf) __O,
2751 __M);
2752}
2753
2754extern __inline __m256
2755__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2756_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
2757{
2758 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2759 (__v8sf)
2760 _mm256_setzero_ps (),
2761 __M);
2762}
2763
2764extern __inline __m256i
2765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2766_mm256_broadcast_i32x4 (__m128i __A)
2767{
2768 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2769 __A,
2770 (__v8si)_mm256_undefined_si256 (),
c42b0bdf 2771 (__mmask8) -1);
936c0fe4
AI
2772}
2773
2774extern __inline __m256i
2775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2776_mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
2777{
2778 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2779 __A,
2780 (__v8si)
2781 __O, __M);
2782}
2783
2784extern __inline __m256i
2785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2786_mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
2787{
2788 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2789 __A,
2790 (__v8si)
2791 _mm256_setzero_si256 (),
2792 __M);
2793}
2794
2795extern __inline __m256i
2796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2797_mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2798{
2799 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2800 (__v8si) __W,
2801 (__mmask8) __U);
2802}
2803
2804extern __inline __m256i
2805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2806_mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2807{
2808 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2809 (__v8si)
2810 _mm256_setzero_si256 (),
2811 (__mmask8) __U);
2812}
2813
2814extern __inline __m128i
2815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2816_mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2817{
2818 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2819 (__v4si) __W,
2820 (__mmask8) __U);
2821}
2822
2823extern __inline __m128i
2824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2825_mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2826{
2827 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2828 (__v4si)
2829 _mm_setzero_si128 (),
2830 (__mmask8) __U);
2831}
2832
2833extern __inline __m256i
2834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2835_mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2836{
2837 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2838 (__v4di) __W,
2839 (__mmask8) __U);
2840}
2841
2842extern __inline __m256i
2843__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2844_mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2845{
2846 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2847 (__v4di)
2848 _mm256_setzero_si256 (),
2849 (__mmask8) __U);
2850}
2851
2852extern __inline __m128i
2853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2854_mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2855{
2856 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2857 (__v2di) __W,
2858 (__mmask8) __U);
2859}
2860
2861extern __inline __m128i
2862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2863_mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2864{
2865 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2866 (__v2di)
2867 _mm_setzero_si128 (),
2868 (__mmask8) __U);
2869}
2870
2871extern __inline __m256i
2872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2873_mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2874{
2875 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2876 (__v8si) __W,
2877 (__mmask8) __U);
2878}
2879
2880extern __inline __m256i
2881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2882_mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2883{
2884 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2885 (__v8si)
2886 _mm256_setzero_si256 (),
2887 (__mmask8) __U);
2888}
2889
2890extern __inline __m128i
2891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2892_mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2893{
2894 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2895 (__v4si) __W,
2896 (__mmask8) __U);
2897}
2898
2899extern __inline __m128i
2900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2901_mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2902{
2903 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2904 (__v4si)
2905 _mm_setzero_si128 (),
2906 (__mmask8) __U);
2907}
2908
2909extern __inline __m256i
2910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2911_mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2912{
2913 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2914 (__v4di) __W,
2915 (__mmask8) __U);
2916}
2917
2918extern __inline __m256i
2919__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2920_mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2921{
2922 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2923 (__v4di)
2924 _mm256_setzero_si256 (),
2925 (__mmask8) __U);
2926}
2927
2928extern __inline __m128i
2929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2930_mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2931{
2932 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2933 (__v2di) __W,
2934 (__mmask8) __U);
2935}
2936
2937extern __inline __m128i
2938__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2939_mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2940{
2941 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2942 (__v2di)
2943 _mm_setzero_si128 (),
2944 (__mmask8) __U);
2945}
2946
2947extern __inline __m256i
2948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2949_mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
2950{
2951 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2952 (__v4di) __W,
2953 (__mmask8) __U);
2954}
2955
2956extern __inline __m256i
2957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2958_mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2959{
2960 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2961 (__v4di)
2962 _mm256_setzero_si256 (),
2963 (__mmask8) __U);
2964}
2965
2966extern __inline __m128i
2967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2968_mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
2969{
2970 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2971 (__v2di) __W,
2972 (__mmask8) __U);
2973}
2974
2975extern __inline __m128i
2976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2977_mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2978{
2979 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2980 (__v2di)
2981 _mm_setzero_si128 (),
2982 (__mmask8) __U);
2983}
2984
2985extern __inline __m256i
2986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2987_mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2988{
2989 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2990 (__v8si) __W,
2991 (__mmask8) __U);
2992}
2993
2994extern __inline __m256i
2995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2996_mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2997{
2998 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2999 (__v8si)
3000 _mm256_setzero_si256 (),
3001 (__mmask8) __U);
3002}
3003
3004extern __inline __m128i
3005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3006_mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
3007{
3008 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
3009 (__v4si) __W,
3010 (__mmask8) __U);
3011}
3012
3013extern __inline __m128i
3014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3015_mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
3016{
3017 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
3018 (__v4si)
3019 _mm_setzero_si128 (),
3020 (__mmask8) __U);
3021}
3022
3023extern __inline __m256i
3024__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3025_mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
3026{
3027 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
3028 (__v4di) __W,
3029 (__mmask8) __U);
3030}
3031
3032extern __inline __m256i
3033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3034_mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3035{
3036 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
3037 (__v4di)
3038 _mm256_setzero_si256 (),
3039 (__mmask8) __U);
3040}
3041
3042extern __inline __m128i
3043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3044_mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3045{
3046 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3047 (__v2di) __W,
3048 (__mmask8) __U);
3049}
3050
3051extern __inline __m128i
3052__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3053_mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3054{
3055 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3056 (__v2di)
3057 _mm_setzero_si128 (),
3058 (__mmask8) __U);
3059}
3060
3061extern __inline __m256i
3062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3063_mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3064{
3065 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3066 (__v8si) __W,
3067 (__mmask8) __U);
3068}
3069
3070extern __inline __m256i
3071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3072_mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3073{
3074 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3075 (__v8si)
3076 _mm256_setzero_si256 (),
3077 (__mmask8) __U);
3078}
3079
3080extern __inline __m128i
3081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3082_mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
3083{
3084 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3085 (__v4si) __W,
3086 (__mmask8) __U);
3087}
3088
3089extern __inline __m128i
3090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3091_mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3092{
3093 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3094 (__v4si)
3095 _mm_setzero_si128 (),
3096 (__mmask8) __U);
3097}
3098
3099extern __inline __m256i
3100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3101_mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
3102{
3103 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3104 (__v4di) __W,
3105 (__mmask8) __U);
3106}
3107
3108extern __inline __m256i
3109__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3110_mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3111{
3112 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3113 (__v4di)
3114 _mm256_setzero_si256 (),
3115 (__mmask8) __U);
3116}
3117
3118extern __inline __m128i
3119__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3120_mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3121{
3122 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3123 (__v2di) __W,
3124 (__mmask8) __U);
3125}
3126
3127extern __inline __m128i
3128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3129_mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3130{
3131 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3132 (__v2di)
3133 _mm_setzero_si128 (),
3134 (__mmask8) __U);
3135}
3136
3137extern __inline __m256i
3138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3139_mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
3140{
3141 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3142 (__v4di) __W,
3143 (__mmask8) __U);
3144}
3145
3146extern __inline __m256i
3147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3148_mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3149{
3150 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3151 (__v4di)
3152 _mm256_setzero_si256 (),
3153 (__mmask8) __U);
3154}
3155
3156extern __inline __m128i
3157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3158_mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
3159{
3160 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3161 (__v2di) __W,
3162 (__mmask8) __U);
3163}
3164
3165extern __inline __m128i
3166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3167_mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3168{
3169 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3170 (__v2di)
3171 _mm_setzero_si128 (),
3172 (__mmask8) __U);
3173}
3174
3175extern __inline __m256d
3176__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3177_mm256_rcp14_pd (__m256d __A)
3178{
3179 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3180 (__v4df)
3181 _mm256_setzero_pd (),
3182 (__mmask8) -1);
3183}
3184
3185extern __inline __m256d
3186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3187_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3188{
3189 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3190 (__v4df) __W,
3191 (__mmask8) __U);
3192}
3193
3194extern __inline __m256d
3195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3196_mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
3197{
3198 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3199 (__v4df)
3200 _mm256_setzero_pd (),
3201 (__mmask8) __U);
3202}
3203
3204extern __inline __m128d
3205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3206_mm_rcp14_pd (__m128d __A)
3207{
3208 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3209 (__v2df)
3210 _mm_setzero_pd (),
3211 (__mmask8) -1);
3212}
3213
3214extern __inline __m128d
3215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3216_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3217{
3218 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3219 (__v2df) __W,
3220 (__mmask8) __U);
3221}
3222
3223extern __inline __m128d
3224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3225_mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
3226{
3227 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3228 (__v2df)
3229 _mm_setzero_pd (),
3230 (__mmask8) __U);
3231}
3232
3233extern __inline __m256
3234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3235_mm256_rcp14_ps (__m256 __A)
3236{
3237 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3238 (__v8sf)
3239 _mm256_setzero_ps (),
3240 (__mmask8) -1);
3241}
3242
3243extern __inline __m256
3244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3245_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3246{
3247 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3248 (__v8sf) __W,
3249 (__mmask8) __U);
3250}
3251
3252extern __inline __m256
3253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3254_mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
3255{
3256 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3257 (__v8sf)
3258 _mm256_setzero_ps (),
3259 (__mmask8) __U);
3260}
3261
3262extern __inline __m128
3263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3264_mm_rcp14_ps (__m128 __A)
3265{
3266 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3267 (__v4sf)
3268 _mm_setzero_ps (),
3269 (__mmask8) -1);
3270}
3271
3272extern __inline __m128
3273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3274_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3275{
3276 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3277 (__v4sf) __W,
3278 (__mmask8) __U);
3279}
3280
3281extern __inline __m128
3282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3283_mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
3284{
3285 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3286 (__v4sf)
3287 _mm_setzero_ps (),
3288 (__mmask8) __U);
3289}
3290
3291extern __inline __m256d
3292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3293_mm256_rsqrt14_pd (__m256d __A)
3294{
3295 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3296 (__v4df)
3297 _mm256_setzero_pd (),
3298 (__mmask8) -1);
3299}
3300
3301extern __inline __m256d
3302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3303_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3304{
3305 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3306 (__v4df) __W,
3307 (__mmask8) __U);
3308}
3309
3310extern __inline __m256d
3311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3312_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
3313{
3314 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3315 (__v4df)
3316 _mm256_setzero_pd (),
3317 (__mmask8) __U);
3318}
3319
3320extern __inline __m128d
3321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3322_mm_rsqrt14_pd (__m128d __A)
3323{
3324 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3325 (__v2df)
3326 _mm_setzero_pd (),
3327 (__mmask8) -1);
3328}
3329
3330extern __inline __m128d
3331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3332_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3333{
3334 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3335 (__v2df) __W,
3336 (__mmask8) __U);
3337}
3338
3339extern __inline __m128d
3340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3341_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
3342{
3343 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3344 (__v2df)
3345 _mm_setzero_pd (),
3346 (__mmask8) __U);
3347}
3348
3349extern __inline __m256
3350__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3351_mm256_rsqrt14_ps (__m256 __A)
3352{
3353 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3354 (__v8sf)
3355 _mm256_setzero_ps (),
3356 (__mmask8) -1);
3357}
3358
3359extern __inline __m256
3360__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3361_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3362{
3363 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3364 (__v8sf) __W,
3365 (__mmask8) __U);
3366}
3367
3368extern __inline __m256
3369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3370_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
3371{
3372 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3373 (__v8sf)
3374 _mm256_setzero_ps (),
3375 (__mmask8) __U);
3376}
3377
3378extern __inline __m128
3379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3380_mm_rsqrt14_ps (__m128 __A)
3381{
3382 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3383 (__v4sf)
3384 _mm_setzero_ps (),
3385 (__mmask8) -1);
3386}
3387
3388extern __inline __m128
3389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3390_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3391{
3392 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3393 (__v4sf) __W,
3394 (__mmask8) __U);
3395}
3396
3397extern __inline __m128
3398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3399_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
3400{
3401 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3402 (__v4sf)
3403 _mm_setzero_ps (),
3404 (__mmask8) __U);
3405}
3406
3407extern __inline __m256d
3408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3409_mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A)
3410{
3411 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3412 (__v4df) __W,
3413 (__mmask8) __U);
3414}
3415
3416extern __inline __m256d
3417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3418_mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A)
3419{
3420 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3421 (__v4df)
3422 _mm256_setzero_pd (),
3423 (__mmask8) __U);
3424}
3425
3426extern __inline __m128d
3427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3428_mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A)
3429{
3430 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3431 (__v2df) __W,
3432 (__mmask8) __U);
3433}
3434
3435extern __inline __m128d
3436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3437_mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A)
3438{
3439 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3440 (__v2df)
3441 _mm_setzero_pd (),
3442 (__mmask8) __U);
3443}
3444
3445extern __inline __m256
3446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3447_mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A)
3448{
3449 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3450 (__v8sf) __W,
3451 (__mmask8) __U);
3452}
3453
3454extern __inline __m256
3455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3456_mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A)
3457{
3458 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3459 (__v8sf)
3460 _mm256_setzero_ps (),
3461 (__mmask8) __U);
3462}
3463
3464extern __inline __m128
3465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3466_mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A)
3467{
3468 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3469 (__v4sf) __W,
3470 (__mmask8) __U);
3471}
3472
3473extern __inline __m128
3474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3475_mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A)
3476{
3477 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3478 (__v4sf)
3479 _mm_setzero_ps (),
3480 (__mmask8) __U);
3481}
3482
3483extern __inline __m256i
3484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3485_mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3486 __m256i __B)
3487{
3488 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3489 (__v8si) __B,
3490 (__v8si) __W,
3491 (__mmask8) __U);
3492}
3493
3494extern __inline __m256i
3495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3496_mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3497{
3498 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3499 (__v8si) __B,
3500 (__v8si)
3501 _mm256_setzero_si256 (),
3502 (__mmask8) __U);
3503}
3504
3505extern __inline __m256i
3506__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3507_mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3508 __m256i __B)
3509{
3510 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3511 (__v4di) __B,
3512 (__v4di) __W,
3513 (__mmask8) __U);
3514}
3515
3516extern __inline __m256i
3517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3518_mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3519{
3520 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3521 (__v4di) __B,
3522 (__v4di)
3523 _mm256_setzero_si256 (),
3524 (__mmask8) __U);
3525}
3526
3527extern __inline __m256i
3528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3529_mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3530 __m256i __B)
3531{
3532 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3533 (__v8si) __B,
3534 (__v8si) __W,
3535 (__mmask8) __U);
3536}
3537
3538extern __inline __m256i
3539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3540_mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3541{
3542 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3543 (__v8si) __B,
3544 (__v8si)
3545 _mm256_setzero_si256 (),
3546 (__mmask8) __U);
3547}
3548
3549extern __inline __m256i
3550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3551_mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3552 __m256i __B)
3553{
3554 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3555 (__v4di) __B,
3556 (__v4di) __W,
3557 (__mmask8) __U);
3558}
3559
3560extern __inline __m256i
3561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3562_mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3563{
3564 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3565 (__v4di) __B,
3566 (__v4di)
3567 _mm256_setzero_si256 (),
3568 (__mmask8) __U);
3569}
3570
3571extern __inline __m128i
3572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3573_mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3574 __m128i __B)
3575{
3576 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3577 (__v4si) __B,
3578 (__v4si) __W,
3579 (__mmask8) __U);
3580}
3581
3582extern __inline __m128i
3583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3584_mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3585{
3586 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3587 (__v4si) __B,
3588 (__v4si)
3589 _mm_setzero_si128 (),
3590 (__mmask8) __U);
3591}
3592
3593extern __inline __m128i
3594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3595_mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3596 __m128i __B)
3597{
3598 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3599 (__v2di) __B,
3600 (__v2di) __W,
3601 (__mmask8) __U);
3602}
3603
3604extern __inline __m128i
3605__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3606_mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3607{
3608 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3609 (__v2di) __B,
3610 (__v2di)
3611 _mm_setzero_si128 (),
3612 (__mmask8) __U);
3613}
3614
3615extern __inline __m128i
3616__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3617_mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3618 __m128i __B)
3619{
3620 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3621 (__v4si) __B,
3622 (__v4si) __W,
3623 (__mmask8) __U);
3624}
3625
3626extern __inline __m128i
3627__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3628_mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3629{
3630 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3631 (__v4si) __B,
3632 (__v4si)
3633 _mm_setzero_si128 (),
3634 (__mmask8) __U);
3635}
3636
3637extern __inline __m128i
3638__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3639_mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3640 __m128i __B)
3641{
3642 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3643 (__v2di) __B,
3644 (__v2di) __W,
3645 (__mmask8) __U);
3646}
3647
3648extern __inline __m128i
3649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3650_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3651{
3652 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3653 (__v2di) __B,
3654 (__v2di)
3655 _mm_setzero_si128 (),
3656 (__mmask8) __U);
3657}
3658
3659extern __inline __m256
3660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3661_mm256_getexp_ps (__m256 __A)
3662{
3663 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3664 (__v8sf)
3665 _mm256_setzero_ps (),
3666 (__mmask8) -1);
3667}
3668
3669extern __inline __m256
3670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3671_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A)
3672{
3673 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3674 (__v8sf) __W,
3675 (__mmask8) __U);
3676}
3677
3678extern __inline __m256
3679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3680_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A)
3681{
3682 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3683 (__v8sf)
3684 _mm256_setzero_ps (),
3685 (__mmask8) __U);
3686}
3687
3688extern __inline __m256d
3689__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3690_mm256_getexp_pd (__m256d __A)
3691{
3692 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3693 (__v4df)
3694 _mm256_setzero_pd (),
3695 (__mmask8) -1);
3696}
3697
3698extern __inline __m256d
3699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3700_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A)
3701{
3702 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3703 (__v4df) __W,
3704 (__mmask8) __U);
3705}
3706
3707extern __inline __m256d
3708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3709_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A)
3710{
3711 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3712 (__v4df)
3713 _mm256_setzero_pd (),
3714 (__mmask8) __U);
3715}
3716
3717extern __inline __m128
3718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3719_mm_getexp_ps (__m128 __A)
3720{
3721 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3722 (__v4sf)
3723 _mm_setzero_ps (),
3724 (__mmask8) -1);
3725}
3726
3727extern __inline __m128
3728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3729_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A)
3730{
3731 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3732 (__v4sf) __W,
3733 (__mmask8) __U);
3734}
3735
3736extern __inline __m128
3737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3738_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A)
3739{
3740 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3741 (__v4sf)
3742 _mm_setzero_ps (),
3743 (__mmask8) __U);
3744}
3745
3746extern __inline __m128d
3747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3748_mm_getexp_pd (__m128d __A)
3749{
3750 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3751 (__v2df)
3752 _mm_setzero_pd (),
3753 (__mmask8) -1);
3754}
3755
3756extern __inline __m128d
3757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3758_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A)
3759{
3760 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3761 (__v2df) __W,
3762 (__mmask8) __U);
3763}
3764
3765extern __inline __m128d
3766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3767_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A)
3768{
3769 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3770 (__v2df)
3771 _mm_setzero_pd (),
3772 (__mmask8) __U);
3773}
3774
3775extern __inline __m256i
3776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3777_mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3778 __m128i __B)
3779{
3780 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3781 (__v4si) __B,
3782 (__v8si) __W,
3783 (__mmask8) __U);
3784}
3785
3786extern __inline __m256i
3787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3788_mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
3789{
3790 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3791 (__v4si) __B,
3792 (__v8si)
3793 _mm256_setzero_si256 (),
3794 (__mmask8) __U);
3795}
3796
3797extern __inline __m128i
3798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3799_mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3800 __m128i __B)
3801{
3802 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3803 (__v4si) __B,
3804 (__v4si) __W,
3805 (__mmask8) __U);
3806}
3807
3808extern __inline __m128i
3809__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3810_mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3811{
3812 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3813 (__v4si) __B,
3814 (__v4si)
3815 _mm_setzero_si128 (),
3816 (__mmask8) __U);
3817}
3818
3819extern __inline __m256i
3820__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3821_mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3822 __m128i __B)
3823{
3824 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3825 (__v2di) __B,
3826 (__v4di) __W,
3827 (__mmask8) __U);
3828}
3829
3830extern __inline __m256i
3831__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3832_mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
3833{
3834 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3835 (__v2di) __B,
3836 (__v4di)
3837 _mm256_setzero_si256 (),
3838 (__mmask8) __U);
3839}
3840
3841extern __inline __m128i
3842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3843_mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3844 __m128i __B)
3845{
3846 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3847 (__v2di) __B,
3848 (__v2di) __W,
3849 (__mmask8) __U);
3850}
3851
3852extern __inline __m128i
3853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3854_mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3855{
3856 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3857 (__v2di) __B,
3858 (__v2di)
a25a7887 3859 _mm_setzero_si128 (),
936c0fe4
AI
3860 (__mmask8) __U);
3861}
3862
3863extern __inline __m256i
3864__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3865_mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3866 __m256i __B)
3867{
3868 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3869 (__v8si) __B,
3870 (__v8si) __W,
3871 (__mmask8) __U);
3872}
3873
3874extern __inline __m256i
3875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3876_mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3877{
3878 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3879 (__v8si) __B,
3880 (__v8si)
3881 _mm256_setzero_si256 (),
3882 (__mmask8) __U);
3883}
3884
3885extern __inline __m256d
3886__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3887_mm256_scalef_pd (__m256d __A, __m256d __B)
3888{
3889 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3890 (__v4df) __B,
3891 (__v4df)
3892 _mm256_setzero_pd (),
3893 (__mmask8) -1);
3894}
3895
3896extern __inline __m256d
3897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3898_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3899 __m256d __B)
3900{
3901 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3902 (__v4df) __B,
3903 (__v4df) __W,
3904 (__mmask8) __U);
3905}
3906
3907extern __inline __m256d
3908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3909_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B)
3910{
3911 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3912 (__v4df) __B,
3913 (__v4df)
3914 _mm256_setzero_pd (),
3915 (__mmask8) __U);
3916}
3917
3918extern __inline __m256
3919__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3920_mm256_scalef_ps (__m256 __A, __m256 __B)
3921{
3922 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3923 (__v8sf) __B,
3924 (__v8sf)
3925 _mm256_setzero_ps (),
3926 (__mmask8) -1);
3927}
3928
3929extern __inline __m256
3930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3931_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3932 __m256 __B)
3933{
3934 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3935 (__v8sf) __B,
3936 (__v8sf) __W,
3937 (__mmask8) __U);
3938}
3939
3940extern __inline __m256
3941__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3942_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B)
3943{
3944 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3945 (__v8sf) __B,
3946 (__v8sf)
3947 _mm256_setzero_ps (),
3948 (__mmask8) __U);
3949}
3950
3951extern __inline __m128d
3952__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3953_mm_scalef_pd (__m128d __A, __m128d __B)
3954{
3955 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3956 (__v2df) __B,
3957 (__v2df)
3958 _mm_setzero_pd (),
3959 (__mmask8) -1);
3960}
3961
3962extern __inline __m128d
3963__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3964_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3965 __m128d __B)
3966{
3967 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3968 (__v2df) __B,
3969 (__v2df) __W,
3970 (__mmask8) __U);
3971}
3972
3973extern __inline __m128d
3974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3975_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B)
3976{
3977 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3978 (__v2df) __B,
3979 (__v2df)
3980 _mm_setzero_pd (),
3981 (__mmask8) __U);
3982}
3983
3984extern __inline __m128
3985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3986_mm_scalef_ps (__m128 __A, __m128 __B)
3987{
3988 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3989 (__v4sf) __B,
3990 (__v4sf)
3991 _mm_setzero_ps (),
3992 (__mmask8) -1);
3993}
3994
3995extern __inline __m128
3996__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3997_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
3998{
3999 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
4000 (__v4sf) __B,
4001 (__v4sf) __W,
4002 (__mmask8) __U);
4003}
4004
4005extern __inline __m128
4006__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4007_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B)
4008{
4009 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
4010 (__v4sf) __B,
4011 (__v4sf)
4012 _mm_setzero_ps (),
4013 (__mmask8) __U);
4014}
4015
4016extern __inline __m256d
4017__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4018_mm256_mask_fmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4019 __m256d __C)
4020{
4021 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
4022 (__v4df) __B,
4023 (__v4df) __C,
4024 (__mmask8) __U);
4025}
4026
4027extern __inline __m256d
4028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4029_mm256_mask3_fmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4030 __mmask8 __U)
4031{
4032 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
4033 (__v4df) __B,
4034 (__v4df) __C,
4035 (__mmask8) __U);
4036}
4037
4038extern __inline __m256d
4039__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4040_mm256_maskz_fmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4041 __m256d __C)
4042{
4043 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4044 (__v4df) __B,
4045 (__v4df) __C,
4046 (__mmask8) __U);
4047}
4048
4049extern __inline __m128d
4050__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4051_mm_mask_fmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4052{
4053 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4054 (__v2df) __B,
4055 (__v2df) __C,
4056 (__mmask8) __U);
4057}
4058
4059extern __inline __m128d
4060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4061_mm_mask3_fmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4062 __mmask8 __U)
4063{
4064 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
4065 (__v2df) __B,
4066 (__v2df) __C,
4067 (__mmask8) __U);
4068}
4069
4070extern __inline __m128d
4071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4072_mm_maskz_fmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4073 __m128d __C)
4074{
4075 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4076 (__v2df) __B,
4077 (__v2df) __C,
4078 (__mmask8) __U);
4079}
4080
4081extern __inline __m256
4082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4083_mm256_mask_fmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4084{
4085 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4086 (__v8sf) __B,
4087 (__v8sf) __C,
4088 (__mmask8) __U);
4089}
4090
4091extern __inline __m256
4092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4093_mm256_mask3_fmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4094 __mmask8 __U)
4095{
4096 return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
4097 (__v8sf) __B,
4098 (__v8sf) __C,
4099 (__mmask8) __U);
4100}
4101
4102extern __inline __m256
4103__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4104_mm256_maskz_fmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4105 __m256 __C)
4106{
4107 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4108 (__v8sf) __B,
4109 (__v8sf) __C,
4110 (__mmask8) __U);
4111}
4112
4113extern __inline __m128
4114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4115_mm_mask_fmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4116{
4117 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4118 (__v4sf) __B,
4119 (__v4sf) __C,
4120 (__mmask8) __U);
4121}
4122
4123extern __inline __m128
4124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4125_mm_mask3_fmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4126{
4127 return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
4128 (__v4sf) __B,
4129 (__v4sf) __C,
4130 (__mmask8) __U);
4131}
4132
4133extern __inline __m128
4134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4135_mm_maskz_fmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4136{
4137 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4138 (__v4sf) __B,
4139 (__v4sf) __C,
4140 (__mmask8) __U);
4141}
4142
4143extern __inline __m256d
4144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4145_mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4146 __m256d __C)
4147{
fe7f972d 4148 return (__m256d) __builtin_ia32_vfmsubpd256_mask ((__v4df) __A,
936c0fe4 4149 (__v4df) __B,
fe7f972d 4150 (__v4df) __C,
936c0fe4
AI
4151 (__mmask8) __U);
4152}
4153
4154extern __inline __m256d
4155__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4156_mm256_mask3_fmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4157 __mmask8 __U)
4158{
4159 return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
4160 (__v4df) __B,
4161 (__v4df) __C,
4162 (__mmask8) __U);
4163}
4164
4165extern __inline __m256d
4166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4167_mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4168 __m256d __C)
4169{
fe7f972d 4170 return (__m256d) __builtin_ia32_vfmsubpd256_maskz ((__v4df) __A,
936c0fe4 4171 (__v4df) __B,
fe7f972d 4172 (__v4df) __C,
936c0fe4
AI
4173 (__mmask8) __U);
4174}
4175
4176extern __inline __m128d
4177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4178_mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4179{
fe7f972d 4180 return (__m128d) __builtin_ia32_vfmsubpd128_mask ((__v2df) __A,
936c0fe4 4181 (__v2df) __B,
fe7f972d 4182 (__v2df) __C,
936c0fe4
AI
4183 (__mmask8) __U);
4184}
4185
4186extern __inline __m128d
4187__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4188_mm_mask3_fmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4189 __mmask8 __U)
4190{
4191 return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
4192 (__v2df) __B,
4193 (__v2df) __C,
4194 (__mmask8) __U);
4195}
4196
4197extern __inline __m128d
4198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4199_mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4200 __m128d __C)
4201{
fe7f972d 4202 return (__m128d) __builtin_ia32_vfmsubpd128_maskz ((__v2df) __A,
936c0fe4 4203 (__v2df) __B,
fe7f972d 4204 (__v2df) __C,
936c0fe4
AI
4205 (__mmask8) __U);
4206}
4207
4208extern __inline __m256
4209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4210_mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4211{
fe7f972d 4212 return (__m256) __builtin_ia32_vfmsubps256_mask ((__v8sf) __A,
936c0fe4 4213 (__v8sf) __B,
fe7f972d 4214 (__v8sf) __C,
936c0fe4
AI
4215 (__mmask8) __U);
4216}
4217
4218extern __inline __m256
4219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4220_mm256_mask3_fmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4221 __mmask8 __U)
4222{
4223 return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
4224 (__v8sf) __B,
4225 (__v8sf) __C,
4226 (__mmask8) __U);
4227}
4228
4229extern __inline __m256
4230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4231_mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4232 __m256 __C)
4233{
fe7f972d 4234 return (__m256) __builtin_ia32_vfmsubps256_maskz ((__v8sf) __A,
936c0fe4 4235 (__v8sf) __B,
fe7f972d 4236 (__v8sf) __C,
936c0fe4
AI
4237 (__mmask8) __U);
4238}
4239
4240extern __inline __m128
4241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4242_mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4243{
fe7f972d 4244 return (__m128) __builtin_ia32_vfmsubps128_mask ((__v4sf) __A,
936c0fe4 4245 (__v4sf) __B,
fe7f972d 4246 (__v4sf) __C,
936c0fe4
AI
4247 (__mmask8) __U);
4248}
4249
4250extern __inline __m128
4251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4252_mm_mask3_fmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4253{
4254 return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
4255 (__v4sf) __B,
4256 (__v4sf) __C,
4257 (__mmask8) __U);
4258}
4259
4260extern __inline __m128
4261__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4262_mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4263{
fe7f972d 4264 return (__m128) __builtin_ia32_vfmsubps128_maskz ((__v4sf) __A,
936c0fe4 4265 (__v4sf) __B,
fe7f972d 4266 (__v4sf) __C,
936c0fe4
AI
4267 (__mmask8) __U);
4268}
4269
4270extern __inline __m256d
4271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4272_mm256_mask_fmaddsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4273 __m256d __C)
4274{
4275 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4276 (__v4df) __B,
4277 (__v4df) __C,
4278 (__mmask8) __U);
4279}
4280
4281extern __inline __m256d
4282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4283_mm256_mask3_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C,
4284 __mmask8 __U)
4285{
4286 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
4287 (__v4df) __B,
4288 (__v4df) __C,
4289 (__mmask8)
4290 __U);
4291}
4292
4293extern __inline __m256d
4294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4295_mm256_maskz_fmaddsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4296 __m256d __C)
4297{
4298 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4299 (__v4df) __B,
4300 (__v4df) __C,
4301 (__mmask8)
4302 __U);
4303}
4304
4305extern __inline __m128d
4306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4307_mm_mask_fmaddsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4308 __m128d __C)
4309{
4310 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4311 (__v2df) __B,
4312 (__v2df) __C,
4313 (__mmask8) __U);
4314}
4315
4316extern __inline __m128d
4317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4318_mm_mask3_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C,
4319 __mmask8 __U)
4320{
4321 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
4322 (__v2df) __B,
4323 (__v2df) __C,
4324 (__mmask8)
4325 __U);
4326}
4327
4328extern __inline __m128d
4329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4330_mm_maskz_fmaddsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4331 __m128d __C)
4332{
4333 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4334 (__v2df) __B,
4335 (__v2df) __C,
4336 (__mmask8)
4337 __U);
4338}
4339
4340extern __inline __m256
4341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4342_mm256_mask_fmaddsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4343 __m256 __C)
4344{
4345 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4346 (__v8sf) __B,
4347 (__v8sf) __C,
4348 (__mmask8) __U);
4349}
4350
4351extern __inline __m256
4352__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4353_mm256_mask3_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C,
4354 __mmask8 __U)
4355{
4356 return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
4357 (__v8sf) __B,
4358 (__v8sf) __C,
4359 (__mmask8) __U);
4360}
4361
4362extern __inline __m256
4363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4364_mm256_maskz_fmaddsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4365 __m256 __C)
4366{
4367 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4368 (__v8sf) __B,
4369 (__v8sf) __C,
4370 (__mmask8) __U);
4371}
4372
4373extern __inline __m128
4374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4375_mm_mask_fmaddsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4376{
4377 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4378 (__v4sf) __B,
4379 (__v4sf) __C,
4380 (__mmask8) __U);
4381}
4382
4383extern __inline __m128
4384__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4385_mm_mask3_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C,
4386 __mmask8 __U)
4387{
4388 return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
4389 (__v4sf) __B,
4390 (__v4sf) __C,
4391 (__mmask8) __U);
4392}
4393
4394extern __inline __m128
4395__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4396_mm_maskz_fmaddsub_ps (__mmask8 __U, __m128 __A, __m128 __B,
4397 __m128 __C)
4398{
4399 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4400 (__v4sf) __B,
4401 (__v4sf) __C,
4402 (__mmask8) __U);
4403}
4404
4405extern __inline __m256d
4406__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4407_mm256_mask_fmsubadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4408 __m256d __C)
4409{
4410 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4411 (__v4df) __B,
4412 -(__v4df) __C,
4413 (__mmask8) __U);
4414}
4415
4416extern __inline __m256d
4417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4418_mm256_mask3_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C,
4419 __mmask8 __U)
4420{
4421 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
4422 (__v4df) __B,
4423 (__v4df) __C,
4424 (__mmask8)
4425 __U);
4426}
4427
4428extern __inline __m256d
4429__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4430_mm256_maskz_fmsubadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4431 __m256d __C)
4432{
4433 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4434 (__v4df) __B,
4435 -(__v4df) __C,
4436 (__mmask8)
4437 __U);
4438}
4439
4440extern __inline __m128d
4441__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4442_mm_mask_fmsubadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4443 __m128d __C)
4444{
4445 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4446 (__v2df) __B,
4447 -(__v2df) __C,
4448 (__mmask8) __U);
4449}
4450
4451extern __inline __m128d
4452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4453_mm_mask3_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C,
4454 __mmask8 __U)
4455{
4456 return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
4457 (__v2df) __B,
4458 (__v2df) __C,
4459 (__mmask8)
4460 __U);
4461}
4462
4463extern __inline __m128d
4464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4465_mm_maskz_fmsubadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4466 __m128d __C)
4467{
4468 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4469 (__v2df) __B,
4470 -(__v2df) __C,
4471 (__mmask8)
4472 __U);
4473}
4474
4475extern __inline __m256
4476__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4477_mm256_mask_fmsubadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4478 __m256 __C)
4479{
4480 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4481 (__v8sf) __B,
4482 -(__v8sf) __C,
4483 (__mmask8) __U);
4484}
4485
4486extern __inline __m256
4487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4488_mm256_mask3_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C,
4489 __mmask8 __U)
4490{
4491 return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
4492 (__v8sf) __B,
4493 (__v8sf) __C,
4494 (__mmask8) __U);
4495}
4496
4497extern __inline __m256
4498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4499_mm256_maskz_fmsubadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4500 __m256 __C)
4501{
4502 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4503 (__v8sf) __B,
4504 -(__v8sf) __C,
4505 (__mmask8) __U);
4506}
4507
4508extern __inline __m128
4509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4510_mm_mask_fmsubadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4511{
4512 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4513 (__v4sf) __B,
4514 -(__v4sf) __C,
4515 (__mmask8) __U);
4516}
4517
4518extern __inline __m128
4519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4520_mm_mask3_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C,
4521 __mmask8 __U)
4522{
4523 return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
4524 (__v4sf) __B,
4525 (__v4sf) __C,
4526 (__mmask8) __U);
4527}
4528
4529extern __inline __m128
4530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4531_mm_maskz_fmsubadd_ps (__mmask8 __U, __m128 __A, __m128 __B,
4532 __m128 __C)
4533{
4534 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4535 (__v4sf) __B,
4536 -(__v4sf) __C,
4537 (__mmask8) __U);
4538}
4539
4540extern __inline __m256d
4541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4542_mm256_mask_fnmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4543 __m256d __C)
4544{
4545 return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
4546 (__v4df) __B,
4547 (__v4df) __C,
4548 (__mmask8) __U);
4549}
4550
4551extern __inline __m256d
4552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4553_mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4554 __mmask8 __U)
4555{
5ca94977
L
4556 return (__m256d) __builtin_ia32_vfnmaddpd256_mask3 ((__v4df) __A,
4557 (__v4df) __B,
4558 (__v4df) __C,
4559 (__mmask8) __U);
936c0fe4
AI
4560}
4561
4562extern __inline __m256d
4563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4564_mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4565 __m256d __C)
4566{
5ca94977
L
4567 return (__m256d) __builtin_ia32_vfnmaddpd256_maskz ((__v4df) __A,
4568 (__v4df) __B,
4569 (__v4df) __C,
4570 (__mmask8) __U);
936c0fe4
AI
4571}
4572
4573extern __inline __m128d
4574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4575_mm_mask_fnmadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4576 __m128d __C)
4577{
4578 return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
4579 (__v2df) __B,
4580 (__v2df) __C,
4581 (__mmask8) __U);
4582}
4583
4584extern __inline __m128d
4585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4586_mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4587 __mmask8 __U)
4588{
5ca94977
L
4589 return (__m128d) __builtin_ia32_vfnmaddpd128_mask3 ((__v2df) __A,
4590 (__v2df) __B,
4591 (__v2df) __C,
4592 (__mmask8) __U);
936c0fe4
AI
4593}
4594
4595extern __inline __m128d
4596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4597_mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4598 __m128d __C)
4599{
5ca94977
L
4600 return (__m128d) __builtin_ia32_vfnmaddpd128_maskz ((__v2df) __A,
4601 (__v2df) __B,
4602 (__v2df) __C,
4603 (__mmask8) __U);
936c0fe4
AI
4604}
4605
4606extern __inline __m256
4607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4608_mm256_mask_fnmadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4609 __m256 __C)
4610{
4611 return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
4612 (__v8sf) __B,
4613 (__v8sf) __C,
4614 (__mmask8) __U);
4615}
4616
4617extern __inline __m256
4618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4619_mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4620 __mmask8 __U)
4621{
5ca94977
L
4622 return (__m256) __builtin_ia32_vfnmaddps256_mask3 ((__v8sf) __A,
4623 (__v8sf) __B,
4624 (__v8sf) __C,
4625 (__mmask8) __U);
936c0fe4
AI
4626}
4627
4628extern __inline __m256
4629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4630_mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4631 __m256 __C)
4632{
5ca94977
L
4633 return (__m256) __builtin_ia32_vfnmaddps256_maskz ((__v8sf) __A,
4634 (__v8sf) __B,
4635 (__v8sf) __C,
4636 (__mmask8) __U);
936c0fe4
AI
4637}
4638
4639extern __inline __m128
4640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4641_mm_mask_fnmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4642{
4643 return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
4644 (__v4sf) __B,
4645 (__v4sf) __C,
4646 (__mmask8) __U);
4647}
4648
4649extern __inline __m128
4650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4651_mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4652{
5ca94977
L
4653 return (__m128) __builtin_ia32_vfnmaddps128_mask3 ((__v4sf) __A,
4654 (__v4sf) __B,
4655 (__v4sf) __C,
4656 (__mmask8) __U);
936c0fe4
AI
4657}
4658
4659extern __inline __m128
4660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4661_mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4662{
5ca94977
L
4663 return (__m128) __builtin_ia32_vfnmaddps128_maskz ((__v4sf) __A,
4664 (__v4sf) __B,
4665 (__v4sf) __C,
4666 (__mmask8) __U);
936c0fe4
AI
4667}
4668
4669extern __inline __m256d
4670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4671_mm256_mask_fnmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4672 __m256d __C)
4673{
4674 return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
4675 (__v4df) __B,
4676 (__v4df) __C,
4677 (__mmask8) __U);
4678}
4679
4680extern __inline __m256d
4681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4682_mm256_mask3_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4683 __mmask8 __U)
4684{
4685 return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
4686 (__v4df) __B,
4687 (__v4df) __C,
4688 (__mmask8) __U);
4689}
4690
4691extern __inline __m256d
4692__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4693_mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4694 __m256d __C)
4695{
38ef6fb1
L
4696 return (__m256d) __builtin_ia32_vfnmsubpd256_maskz ((__v4df) __A,
4697 (__v4df) __B,
4698 (__v4df) __C,
4699 (__mmask8) __U);
936c0fe4
AI
4700}
4701
4702extern __inline __m128d
4703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4704_mm_mask_fnmsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4705 __m128d __C)
4706{
4707 return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
4708 (__v2df) __B,
4709 (__v2df) __C,
4710 (__mmask8) __U);
4711}
4712
4713extern __inline __m128d
4714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4715_mm_mask3_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4716 __mmask8 __U)
4717{
4718 return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
4719 (__v2df) __B,
4720 (__v2df) __C,
4721 (__mmask8) __U);
4722}
4723
4724extern __inline __m128d
4725__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4726_mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4727 __m128d __C)
4728{
38ef6fb1
L
4729 return (__m128d) __builtin_ia32_vfnmsubpd128_maskz ((__v2df) __A,
4730 (__v2df) __B,
4731 (__v2df) __C,
4732 (__mmask8) __U);
936c0fe4
AI
4733}
4734
4735extern __inline __m256
4736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4737_mm256_mask_fnmsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4738 __m256 __C)
4739{
4740 return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
4741 (__v8sf) __B,
4742 (__v8sf) __C,
4743 (__mmask8) __U);
4744}
4745
4746extern __inline __m256
4747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4748_mm256_mask3_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4749 __mmask8 __U)
4750{
4751 return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
4752 (__v8sf) __B,
4753 (__v8sf) __C,
4754 (__mmask8) __U);
4755}
4756
4757extern __inline __m256
4758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4759_mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4760 __m256 __C)
4761{
38ef6fb1
L
4762 return (__m256) __builtin_ia32_vfnmsubps256_maskz ((__v8sf) __A,
4763 (__v8sf) __B,
4764 (__v8sf) __C,
4765 (__mmask8) __U);
936c0fe4
AI
4766}
4767
4768extern __inline __m128
4769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4770_mm_mask_fnmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4771{
4772 return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
4773 (__v4sf) __B,
4774 (__v4sf) __C,
4775 (__mmask8) __U);
4776}
4777
4778extern __inline __m128
4779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4780_mm_mask3_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4781{
4782 return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
4783 (__v4sf) __B,
4784 (__v4sf) __C,
4785 (__mmask8) __U);
4786}
4787
4788extern __inline __m128
4789__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4790_mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4791{
38ef6fb1
L
4792 return (__m128) __builtin_ia32_vfnmsubps128_maskz ((__v4sf) __A,
4793 (__v4sf) __B,
4794 (__v4sf) __C,
4795 (__mmask8) __U);
936c0fe4
AI
4796}
4797
4798extern __inline __m128i
4799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4800_mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4801 __m128i __B)
4802{
4803 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4804 (__v4si) __B,
4805 (__v4si) __W,
4806 (__mmask8) __U);
4807}
4808
4809extern __inline __m128i
4810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4811_mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4812{
4813 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4814 (__v4si) __B,
4815 (__v4si)
4816 _mm_setzero_si128 (),
4817 (__mmask8) __U);
4818}
4819
4820extern __inline __m256i
4821__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4822_mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4823 __m256i __B)
4824{
4825 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4826 (__v8si) __B,
4827 (__v8si) __W,
4828 (__mmask8) __U);
4829}
4830
4831extern __inline __m256i
4832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4833_mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4834{
4835 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4836 (__v8si) __B,
4837 (__v8si)
4838 _mm256_setzero_si256 (),
4839 (__mmask8) __U);
4840}
4841
4842extern __inline __m128i
4843__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4844_mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4845 __m128i __B)
4846{
4847 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4848 (__v4si) __B,
4849 (__v4si) __W,
4850 (__mmask8) __U);
4851}
4852
4853extern __inline __m128i
4854__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4855_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4856{
4857 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4858 (__v4si) __B,
4859 (__v4si)
4860 _mm_setzero_si128 (),
4861 (__mmask8) __U);
4862}
4863
4864extern __inline __m256i
4865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4866_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4867 __m256i __B)
4868{
4869 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4870 (__v8si) __B,
4871 (__v8si) __W,
4872 (__mmask8) __U);
4873}
4874
4875extern __inline __m256i
4876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4877_mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4878{
4879 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4880 (__v8si) __B,
4881 (__v8si)
4882 _mm256_setzero_si256 (),
4883 (__mmask8) __U);
4884}
4885
01fd9f8d
L
4886extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4887_mm256_or_epi32 (__m256i __A, __m256i __B)
4888{
4889 return (__m256i) ((__v8su)__A | (__v8su)__B);
4890}
4891
936c0fe4
AI
4892extern __inline __m128i
4893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4894_mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4895{
4896 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4897 (__v4si) __B,
4898 (__v4si) __W,
4899 (__mmask8) __U);
4900}
4901
4902extern __inline __m128i
4903__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4904_mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4905{
4906 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4907 (__v4si) __B,
4908 (__v4si)
4909 _mm_setzero_si128 (),
4910 (__mmask8) __U);
4911}
4912
01fd9f8d
L
4913extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4914_mm_or_epi32 (__m128i __A, __m128i __B)
4915{
4916 return (__m128i) ((__v4su)__A | (__v4su)__B);
4917}
4918
936c0fe4
AI
4919extern __inline __m256i
4920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4921_mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4922 __m256i __B)
4923{
4924 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4925 (__v8si) __B,
4926 (__v8si) __W,
4927 (__mmask8) __U);
4928}
4929
4930extern __inline __m256i
4931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4932_mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4933{
4934 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4935 (__v8si) __B,
4936 (__v8si)
4937 _mm256_setzero_si256 (),
4938 (__mmask8) __U);
4939}
4940
01fd9f8d
L
4941extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4942_mm256_xor_epi32 (__m256i __A, __m256i __B)
4943{
4944 return (__m256i) ((__v8su)__A ^ (__v8su)__B);
4945}
4946
936c0fe4
AI
4947extern __inline __m128i
4948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4949_mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4950 __m128i __B)
4951{
4952 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4953 (__v4si) __B,
4954 (__v4si) __W,
4955 (__mmask8) __U);
4956}
4957
4958extern __inline __m128i
4959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4960_mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4961{
4962 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4963 (__v4si) __B,
4964 (__v4si)
4965 _mm_setzero_si128 (),
4966 (__mmask8) __U);
4967}
4968
01fd9f8d
L
4969extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4970_mm_xor_epi32 (__m128i __A, __m128i __B)
4971{
4972 return (__m128i) ((__v4su)__A ^ (__v4su)__B);
4973}
4974
936c0fe4
AI
4975extern __inline __m128
4976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4977_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A)
4978{
4979 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4980 (__v4sf) __W,
4981 (__mmask8) __U);
4982}
4983
4984extern __inline __m128
4985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4986_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A)
4987{
4988 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4989 (__v4sf)
4990 _mm_setzero_ps (),
4991 (__mmask8) __U);
4992}
4993
4994extern __inline __m128
4995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4996_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A)
4997{
4998 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4999 (__v4sf) __W,
5000 (__mmask8) __U);
5001}
5002
5003extern __inline __m128
5004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5005_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A)
5006{
5007 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
5008 (__v4sf)
5009 _mm_setzero_ps (),
5010 (__mmask8) __U);
5011}
5012
5013extern __inline __m256i
5014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5015_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
5016{
5017 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
5018 (__v8si) __W,
5019 (__mmask8) __U);
5020}
5021
5022extern __inline __m256i
5023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5024_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A)
5025{
5026 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
5027 (__v8si)
5028 _mm256_setzero_si256 (),
5029 (__mmask8) __U);
5030}
5031
5032extern __inline __m128i
5033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5034_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
5035{
5036 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
5037 (__v4si) __W,
5038 (__mmask8) __U);
5039}
5040
5041extern __inline __m128i
5042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5043_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A)
5044{
5045 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
5046 (__v4si)
5047 _mm_setzero_si128 (),
5048 (__mmask8) __U);
5049}
5050
5051extern __inline __m256i
5052__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5053_mm256_cvtps_epu32 (__m256 __A)
5054{
5055 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5056 (__v8si)
5057 _mm256_setzero_si256 (),
5058 (__mmask8) -1);
5059}
5060
5061extern __inline __m256i
5062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5063_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
5064{
5065 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5066 (__v8si) __W,
5067 (__mmask8) __U);
5068}
5069
5070extern __inline __m256i
5071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5072_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A)
5073{
5074 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5075 (__v8si)
5076 _mm256_setzero_si256 (),
5077 (__mmask8) __U);
5078}
5079
5080extern __inline __m128i
5081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5082_mm_cvtps_epu32 (__m128 __A)
5083{
5084 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5085 (__v4si)
5086 _mm_setzero_si128 (),
5087 (__mmask8) -1);
5088}
5089
5090extern __inline __m128i
5091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5092_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
5093{
5094 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5095 (__v4si) __W,
5096 (__mmask8) __U);
5097}
5098
5099extern __inline __m128i
5100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5101_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A)
5102{
5103 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5104 (__v4si)
5105 _mm_setzero_si128 (),
5106 (__mmask8) __U);
5107}
5108
5109extern __inline __m256d
5110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5111_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5112{
5113 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5114 (__v4df) __W,
5115 (__mmask8) __U);
5116}
5117
5118extern __inline __m256d
5119__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5120_mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5121{
5122 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5123 (__v4df)
5124 _mm256_setzero_pd (),
5125 (__mmask8) __U);
5126}
5127
5128extern __inline __m128d
5129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5130_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5131{
5132 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5133 (__v2df) __W,
5134 (__mmask8) __U);
5135}
5136
5137extern __inline __m128d
5138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5139_mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5140{
5141 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5142 (__v2df)
5143 _mm_setzero_pd (),
5144 (__mmask8) __U);
5145}
5146
5147extern __inline __m256
5148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5149_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5150{
5151 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5152 (__v8sf) __W,
5153 (__mmask8) __U);
5154}
5155
5156extern __inline __m256
5157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5158_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
5159{
5160 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5161 (__v8sf)
5162 _mm256_setzero_ps (),
5163 (__mmask8) __U);
5164}
5165
5166extern __inline __m128
5167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5168_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5169{
5170 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5171 (__v4sf) __W,
5172 (__mmask8) __U);
5173}
5174
5175extern __inline __m128
5176__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5177_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
5178{
5179 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5180 (__v4sf)
5181 _mm_setzero_ps (),
5182 (__mmask8) __U);
5183}
5184
5185extern __inline __m256
5186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5187_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5188{
5189 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5190 (__v8sf) __W,
5191 (__mmask8) __U);
5192}
5193
5194extern __inline __m256
5195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5196_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
5197{
5198 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5199 (__v8sf)
5200 _mm256_setzero_ps (),
5201 (__mmask8) __U);
5202}
5203
5204extern __inline __m128
5205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5206_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5207{
5208 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5209 (__v4sf) __W,
5210 (__mmask8) __U);
5211}
5212
5213extern __inline __m128
5214__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5215_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
5216{
5217 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5218 (__v4sf)
5219 _mm_setzero_ps (),
5220 (__mmask8) __U);
5221}
5222
5223extern __inline __m128i
5224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5225_mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5226 __m128i __B)
5227{
5228 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5229 (__v4si) __B,
5230 (__v4si) __W,
5231 (__mmask8) __U);
5232}
5233
5234extern __inline __m128i
5235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5236_mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5237{
5238 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5239 (__v4si) __B,
5240 (__v4si)
5241 _mm_setzero_si128 (),
5242 (__mmask8) __U);
5243}
5244
5245extern __inline __m256i
5246__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5247_mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5248 __m256i __B)
5249{
5250 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5251 (__v8si) __B,
5252 (__v8si) __W,
5253 (__mmask8) __U);
5254}
5255
5256extern __inline __m256i
5257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5258_mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5259{
5260 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5261 (__v8si) __B,
5262 (__v8si)
5263 _mm256_setzero_si256 (),
5264 (__mmask8) __U);
5265}
5266
5267extern __inline __m128i
5268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5269_mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5270 __m128i __B)
5271{
5272 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5273 (__v2di) __B,
5274 (__v2di) __W,
5275 (__mmask8) __U);
5276}
5277
5278extern __inline __m128i
5279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5280_mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5281{
5282 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5283 (__v2di) __B,
5284 (__v2di)
a25a7887 5285 _mm_setzero_si128 (),
936c0fe4
AI
5286 (__mmask8) __U);
5287}
5288
5289extern __inline __m256i
5290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5291_mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5292 __m256i __B)
5293{
5294 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5295 (__v4di) __B,
5296 (__v4di) __W,
5297 (__mmask8) __U);
5298}
5299
5300extern __inline __m256i
5301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5302_mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5303{
5304 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5305 (__v4di) __B,
5306 (__v4di)
5307 _mm256_setzero_si256 (),
5308 (__mmask8) __U);
5309}
5310
5311extern __inline __m128i
5312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5313_mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5314 __m128i __B)
5315{
5316 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5317 (__v4si) __B,
5318 (__v4si) __W,
5319 (__mmask8) __U);
5320}
5321
5322extern __inline __m128i
5323__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5324_mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5325{
5326 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5327 (__v4si) __B,
5328 (__v4si)
5329 _mm_setzero_si128 (),
5330 (__mmask8) __U);
5331}
5332
5333extern __inline __m256i
5334__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5335_mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5336 __m256i __B)
5337{
5338 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5339 (__v8si) __B,
5340 (__v8si) __W,
5341 (__mmask8) __U);
5342}
5343
5344extern __inline __m256i
5345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5346_mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5347{
5348 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5349 (__v8si) __B,
5350 (__v8si)
5351 _mm256_setzero_si256 (),
5352 (__mmask8) __U);
5353}
5354
5355extern __inline __m128i
5356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5357_mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5358 __m128i __B)
5359{
5360 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5361 (__v2di) __B,
5362 (__v2di) __W,
5363 (__mmask8) __U);
5364}
5365
5366extern __inline __m128i
5367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5368_mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5369{
5370 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5371 (__v2di) __B,
5372 (__v2di)
a25a7887 5373 _mm_setzero_si128 (),
936c0fe4
AI
5374 (__mmask8) __U);
5375}
5376
5377extern __inline __m256i
5378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5379_mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5380 __m256i __B)
5381{
5382 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5383 (__v4di) __B,
5384 (__v4di) __W,
5385 (__mmask8) __U);
5386}
5387
5388extern __inline __m256i
5389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5390_mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5391{
5392 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5393 (__v4di) __B,
5394 (__v4di)
5395 _mm256_setzero_si256 (),
5396 (__mmask8) __U);
5397}
5398
eee5d6f5
AI
5399extern __inline __mmask8
5400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5401_mm_cmpeq_epu32_mask (__m128i __A, __m128i __B)
5402{
5403 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5404 (__v4si) __B, 0,
5405 (__mmask8) -1);
5406}
5407
936c0fe4
AI
5408extern __inline __mmask8
5409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5410_mm_cmpeq_epi32_mask (__m128i __A, __m128i __B)
5411{
5412 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5413 (__v4si) __B,
5414 (__mmask8) -1);
5415}
5416
eee5d6f5
AI
5417extern __inline __mmask8
5418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5419_mm_mask_cmpeq_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5420{
5421 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5422 (__v4si) __B, 0, __U);
5423}
5424
936c0fe4
AI
5425extern __inline __mmask8
5426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5427_mm_mask_cmpeq_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5428{
5429 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5430 (__v4si) __B, __U);
5431}
5432
eee5d6f5
AI
5433extern __inline __mmask8
5434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5435_mm256_cmpeq_epu32_mask (__m256i __A, __m256i __B)
5436{
5437 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5438 (__v8si) __B, 0,
5439 (__mmask8) -1);
5440}
5441
936c0fe4
AI
5442extern __inline __mmask8
5443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5444_mm256_cmpeq_epi32_mask (__m256i __A, __m256i __B)
5445{
5446 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5447 (__v8si) __B,
5448 (__mmask8) -1);
5449}
5450
eee5d6f5
AI
5451extern __inline __mmask8
5452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5453_mm256_mask_cmpeq_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5454{
5455 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5456 (__v8si) __B, 0, __U);
5457}
5458
936c0fe4
AI
5459extern __inline __mmask8
5460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5461_mm256_mask_cmpeq_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5462{
5463 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5464 (__v8si) __B, __U);
5465}
5466
eee5d6f5
AI
5467extern __inline __mmask8
5468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5469_mm_cmpeq_epu64_mask (__m128i __A, __m128i __B)
5470{
5471 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5472 (__v2di) __B, 0,
5473 (__mmask8) -1);
5474}
5475
936c0fe4
AI
5476extern __inline __mmask8
5477__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5478_mm_cmpeq_epi64_mask (__m128i __A, __m128i __B)
5479{
5480 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5481 (__v2di) __B,
5482 (__mmask8) -1);
5483}
5484
eee5d6f5
AI
5485extern __inline __mmask8
5486__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5487_mm_mask_cmpeq_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5488{
5489 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5490 (__v2di) __B, 0, __U);
5491}
5492
936c0fe4
AI
5493extern __inline __mmask8
5494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5495_mm_mask_cmpeq_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5496{
5497 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5498 (__v2di) __B, __U);
5499}
5500
eee5d6f5
AI
5501extern __inline __mmask8
5502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5503_mm256_cmpeq_epu64_mask (__m256i __A, __m256i __B)
5504{
5505 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5506 (__v4di) __B, 0,
5507 (__mmask8) -1);
5508}
5509
936c0fe4
AI
5510extern __inline __mmask8
5511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5512_mm256_cmpeq_epi64_mask (__m256i __A, __m256i __B)
5513{
5514 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5515 (__v4di) __B,
5516 (__mmask8) -1);
5517}
5518
eee5d6f5
AI
5519extern __inline __mmask8
5520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5521_mm256_mask_cmpeq_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5522{
5523 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5524 (__v4di) __B, 0, __U);
5525}
5526
936c0fe4
AI
5527extern __inline __mmask8
5528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5529_mm256_mask_cmpeq_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5530{
5531 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5532 (__v4di) __B, __U);
5533}
5534
eee5d6f5
AI
5535extern __inline __mmask8
5536__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5537_mm_cmpgt_epu32_mask (__m128i __A, __m128i __B)
5538{
5539 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5540 (__v4si) __B, 6,
5541 (__mmask8) -1);
5542}
5543
936c0fe4
AI
5544extern __inline __mmask8
5545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5546_mm_cmpgt_epi32_mask (__m128i __A, __m128i __B)
5547{
5548 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5549 (__v4si) __B,
5550 (__mmask8) -1);
5551}
5552
eee5d6f5
AI
5553extern __inline __mmask8
5554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5555_mm_mask_cmpgt_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5556{
5557 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5558 (__v4si) __B, 6, __U);
5559}
5560
936c0fe4
AI
5561extern __inline __mmask8
5562__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5563_mm_mask_cmpgt_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5564{
5565 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5566 (__v4si) __B, __U);
5567}
5568
eee5d6f5
AI
5569extern __inline __mmask8
5570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5571_mm256_cmpgt_epu32_mask (__m256i __A, __m256i __B)
5572{
5573 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5574 (__v8si) __B, 6,
5575 (__mmask8) -1);
5576}
5577
936c0fe4
AI
5578extern __inline __mmask8
5579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5580_mm256_cmpgt_epi32_mask (__m256i __A, __m256i __B)
5581{
5582 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5583 (__v8si) __B,
5584 (__mmask8) -1);
5585}
5586
eee5d6f5
AI
5587extern __inline __mmask8
5588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5589_mm256_mask_cmpgt_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5590{
5591 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5592 (__v8si) __B, 6, __U);
5593}
5594
936c0fe4
AI
5595extern __inline __mmask8
5596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5597_mm256_mask_cmpgt_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5598{
5599 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5600 (__v8si) __B, __U);
5601}
5602
eee5d6f5
AI
5603extern __inline __mmask8
5604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5605_mm_cmpgt_epu64_mask (__m128i __A, __m128i __B)
5606{
5607 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5608 (__v2di) __B, 6,
5609 (__mmask8) -1);
5610}
5611
936c0fe4
AI
5612extern __inline __mmask8
5613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5614_mm_cmpgt_epi64_mask (__m128i __A, __m128i __B)
5615{
5616 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5617 (__v2di) __B,
5618 (__mmask8) -1);
5619}
5620
eee5d6f5
AI
5621extern __inline __mmask8
5622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5623_mm_mask_cmpgt_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5624{
5625 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5626 (__v2di) __B, 6, __U);
5627}
5628
936c0fe4
AI
5629extern __inline __mmask8
5630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5631_mm_mask_cmpgt_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5632{
5633 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5634 (__v2di) __B, __U);
5635}
5636
eee5d6f5
AI
5637extern __inline __mmask8
5638__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5639_mm256_cmpgt_epu64_mask (__m256i __A, __m256i __B)
5640{
5641 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5642 (__v4di) __B, 6,
5643 (__mmask8) -1);
5644}
5645
936c0fe4
AI
5646extern __inline __mmask8
5647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5648_mm256_cmpgt_epi64_mask (__m256i __A, __m256i __B)
5649{
5650 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5651 (__v4di) __B,
5652 (__mmask8) -1);
5653}
5654
eee5d6f5
AI
5655extern __inline __mmask8
5656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5657_mm256_mask_cmpgt_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5658{
5659 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5660 (__v4di) __B, 6, __U);
5661}
5662
936c0fe4
AI
5663extern __inline __mmask8
5664__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5665_mm256_mask_cmpgt_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5666{
5667 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5668 (__v4di) __B, __U);
5669}
5670
5671extern __inline __mmask8
5672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5673_mm_test_epi32_mask (__m128i __A, __m128i __B)
5674{
5675 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5676 (__v4si) __B,
5677 (__mmask8) -1);
5678}
5679
5680extern __inline __mmask8
5681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5682_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5683{
5684 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5685 (__v4si) __B, __U);
5686}
5687
5688extern __inline __mmask8
5689__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5690_mm256_test_epi32_mask (__m256i __A, __m256i __B)
5691{
5692 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5693 (__v8si) __B,
5694 (__mmask8) -1);
5695}
5696
5697extern __inline __mmask8
5698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5699_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5700{
5701 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5702 (__v8si) __B, __U);
5703}
5704
5705extern __inline __mmask8
5706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5707_mm_test_epi64_mask (__m128i __A, __m128i __B)
5708{
5709 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5710 (__v2di) __B,
5711 (__mmask8) -1);
5712}
5713
5714extern __inline __mmask8
5715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5716_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5717{
5718 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5719 (__v2di) __B, __U);
5720}
5721
5722extern __inline __mmask8
5723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5724_mm256_test_epi64_mask (__m256i __A, __m256i __B)
5725{
5726 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5727 (__v4di) __B,
5728 (__mmask8) -1);
5729}
5730
5731extern __inline __mmask8
5732__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5733_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5734{
5735 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5736 (__v4di) __B, __U);
5737}
5738
5739extern __inline __mmask8
5740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5741_mm_testn_epi32_mask (__m128i __A, __m128i __B)
5742{
5743 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5744 (__v4si) __B,
5745 (__mmask8) -1);
5746}
5747
5748extern __inline __mmask8
5749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5750_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5751{
5752 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5753 (__v4si) __B, __U);
5754}
5755
5756extern __inline __mmask8
5757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5758_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
5759{
5760 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5761 (__v8si) __B,
5762 (__mmask8) -1);
5763}
5764
5765extern __inline __mmask8
5766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5767_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5768{
5769 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5770 (__v8si) __B, __U);
5771}
5772
5773extern __inline __mmask8
5774__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5775_mm_testn_epi64_mask (__m128i __A, __m128i __B)
5776{
5777 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5778 (__v2di) __B,
5779 (__mmask8) -1);
5780}
5781
5782extern __inline __mmask8
5783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5784_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5785{
5786 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5787 (__v2di) __B, __U);
5788}
5789
5790extern __inline __mmask8
5791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5792_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
5793{
5794 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5795 (__v4di) __B,
5796 (__mmask8) -1);
5797}
5798
5799extern __inline __mmask8
5800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5801_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5802{
5803 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5804 (__v4di) __B, __U);
5805}
5806
5807extern __inline __m256d
5808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5809_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A)
5810{
5811 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5812 (__v4df) __W,
5813 (__mmask8) __U);
5814}
5815
5816extern __inline __m256d
5817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5818_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A)
5819{
5820 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5821 (__v4df)
5822 _mm256_setzero_pd (),
5823 (__mmask8) __U);
5824}
5825
5826extern __inline void
5827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5828_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A)
5829{
5830 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
5831 (__v4df) __A,
5832 (__mmask8) __U);
5833}
5834
5835extern __inline __m128d
5836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5837_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A)
5838{
5839 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5840 (__v2df) __W,
5841 (__mmask8) __U);
5842}
5843
5844extern __inline __m128d
5845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5846_mm_maskz_compress_pd (__mmask8 __U, __m128d __A)
5847{
5848 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5849 (__v2df)
5850 _mm_setzero_pd (),
5851 (__mmask8) __U);
5852}
5853
5854extern __inline void
5855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5856_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A)
5857{
5858 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
5859 (__v2df) __A,
5860 (__mmask8) __U);
5861}
5862
5863extern __inline __m256
5864__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5865_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A)
5866{
5867 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5868 (__v8sf) __W,
5869 (__mmask8) __U);
5870}
5871
5872extern __inline __m256
5873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5874_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A)
5875{
5876 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5877 (__v8sf)
5878 _mm256_setzero_ps (),
5879 (__mmask8) __U);
5880}
5881
5882extern __inline void
5883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5884_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A)
5885{
5886 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
5887 (__v8sf) __A,
5888 (__mmask8) __U);
5889}
5890
5891extern __inline __m128
5892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5893_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A)
5894{
5895 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5896 (__v4sf) __W,
5897 (__mmask8) __U);
5898}
5899
5900extern __inline __m128
5901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5902_mm_maskz_compress_ps (__mmask8 __U, __m128 __A)
5903{
5904 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5905 (__v4sf)
5906 _mm_setzero_ps (),
5907 (__mmask8) __U);
5908}
5909
5910extern __inline void
5911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5912_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A)
5913{
5914 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
5915 (__v4sf) __A,
5916 (__mmask8) __U);
5917}
5918
5919extern __inline __m256i
5920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5921_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5922{
5923 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5924 (__v4di) __W,
5925 (__mmask8) __U);
5926}
5927
5928extern __inline __m256i
5929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5930_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A)
5931{
5932 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5933 (__v4di)
5934 _mm256_setzero_si256 (),
5935 (__mmask8) __U);
5936}
5937
5938extern __inline void
5939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5940_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5941{
5942 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
5943 (__v4di) __A,
5944 (__mmask8) __U);
5945}
5946
5947extern __inline __m128i
5948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5949_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5950{
5951 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5952 (__v2di) __W,
5953 (__mmask8) __U);
5954}
5955
5956extern __inline __m128i
5957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5958_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
5959{
5960 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5961 (__v2di)
a25a7887 5962 _mm_setzero_si128 (),
936c0fe4
AI
5963 (__mmask8) __U);
5964}
5965
5966extern __inline void
5967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5968_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5969{
5970 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
5971 (__v2di) __A,
5972 (__mmask8) __U);
5973}
5974
5975extern __inline __m256i
5976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5977_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5978{
5979 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5980 (__v8si) __W,
5981 (__mmask8) __U);
5982}
5983
5984extern __inline __m256i
5985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5986_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A)
5987{
5988 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5989 (__v8si)
5990 _mm256_setzero_si256 (),
5991 (__mmask8) __U);
5992}
5993
5994extern __inline void
5995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5996_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5997{
5998 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
5999 (__v8si) __A,
6000 (__mmask8) __U);
6001}
6002
6003extern __inline __m128i
6004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6005_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
6006{
6007 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
6008 (__v4si) __W,
6009 (__mmask8) __U);
6010}
6011
6012extern __inline __m128i
6013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6014_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A)
6015{
6016 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
6017 (__v4si)
6018 _mm_setzero_si128 (),
6019 (__mmask8) __U);
6020}
6021
6022extern __inline void
6023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6024_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A)
6025{
6026 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
6027 (__v4si) __A,
6028 (__mmask8) __U);
6029}
6030
6031extern __inline __m256d
6032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6033_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A)
6034{
6035 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
6036 (__v4df) __W,
6037 (__mmask8) __U);
6038}
6039
6040extern __inline __m256d
6041__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6042_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A)
6043{
6044 return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A,
6045 (__v4df)
6046 _mm256_setzero_pd (),
6047 (__mmask8) __U);
6048}
6049
6050extern __inline __m256d
6051__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6052_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P)
6053{
6054 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
6055 (__v4df) __W,
6056 (__mmask8)
6057 __U);
6058}
6059
6060extern __inline __m256d
6061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6062_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6063{
6064 return (__m256d) __builtin_ia32_expandloaddf256_maskz ((__v4df *) __P,
6065 (__v4df)
6066 _mm256_setzero_pd (),
6067 (__mmask8)
6068 __U);
6069}
6070
6071extern __inline __m128d
6072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6073_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A)
6074{
6075 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
6076 (__v2df) __W,
6077 (__mmask8) __U);
6078}
6079
6080extern __inline __m128d
6081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6082_mm_maskz_expand_pd (__mmask8 __U, __m128d __A)
6083{
6084 return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A,
6085 (__v2df)
6086 _mm_setzero_pd (),
6087 (__mmask8) __U);
6088}
6089
6090extern __inline __m128d
6091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6092_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P)
6093{
6094 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
6095 (__v2df) __W,
6096 (__mmask8)
6097 __U);
6098}
6099
6100extern __inline __m128d
6101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6102_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6103{
6104 return (__m128d) __builtin_ia32_expandloaddf128_maskz ((__v2df *) __P,
6105 (__v2df)
6106 _mm_setzero_pd (),
6107 (__mmask8)
6108 __U);
6109}
6110
6111extern __inline __m256
6112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6113_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A)
6114{
6115 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
6116 (__v8sf) __W,
6117 (__mmask8) __U);
6118}
6119
6120extern __inline __m256
6121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6122_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A)
6123{
6124 return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A,
6125 (__v8sf)
6126 _mm256_setzero_ps (),
6127 (__mmask8) __U);
6128}
6129
6130extern __inline __m256
6131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6132_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P)
6133{
6134 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
6135 (__v8sf) __W,
6136 (__mmask8) __U);
6137}
6138
6139extern __inline __m256
6140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6141_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6142{
6143 return (__m256) __builtin_ia32_expandloadsf256_maskz ((__v8sf *) __P,
6144 (__v8sf)
6145 _mm256_setzero_ps (),
6146 (__mmask8)
6147 __U);
6148}
6149
6150extern __inline __m128
6151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6152_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A)
6153{
6154 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
6155 (__v4sf) __W,
6156 (__mmask8) __U);
6157}
6158
6159extern __inline __m128
6160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6161_mm_maskz_expand_ps (__mmask8 __U, __m128 __A)
6162{
6163 return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A,
6164 (__v4sf)
6165 _mm_setzero_ps (),
6166 (__mmask8) __U);
6167}
6168
6169extern __inline __m128
6170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6171_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P)
6172{
6173 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
6174 (__v4sf) __W,
6175 (__mmask8) __U);
6176}
6177
6178extern __inline __m128
6179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6180_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6181{
6182 return (__m128) __builtin_ia32_expandloadsf128_maskz ((__v4sf *) __P,
6183 (__v4sf)
6184 _mm_setzero_ps (),
6185 (__mmask8)
6186 __U);
6187}
6188
6189extern __inline __m256i
6190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6191_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
6192{
6193 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
6194 (__v4di) __W,
6195 (__mmask8) __U);
6196}
6197
6198extern __inline __m256i
6199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6200_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A)
6201{
6202 return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A,
6203 (__v4di)
6204 _mm256_setzero_si256 (),
6205 (__mmask8) __U);
6206}
6207
6208extern __inline __m256i
6209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6210_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
6211 void const *__P)
6212{
6213 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
6214 (__v4di) __W,
6215 (__mmask8)
6216 __U);
6217}
6218
6219extern __inline __m256i
6220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6221_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6222{
6223 return (__m256i) __builtin_ia32_expandloaddi256_maskz ((__v4di *) __P,
6224 (__v4di)
6225 _mm256_setzero_si256 (),
6226 (__mmask8)
6227 __U);
6228}
6229
6230extern __inline __m128i
6231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6232_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
6233{
6234 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
6235 (__v2di) __W,
6236 (__mmask8) __U);
6237}
6238
6239extern __inline __m128i
6240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6241_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A)
6242{
6243 return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A,
6244 (__v2di)
6245 _mm_setzero_si128 (),
6246 (__mmask8) __U);
6247}
6248
6249extern __inline __m128i
6250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6251_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
6252{
6253 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
6254 (__v2di) __W,
6255 (__mmask8)
6256 __U);
6257}
6258
6259extern __inline __m128i
6260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6261_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6262{
6263 return (__m128i) __builtin_ia32_expandloaddi128_maskz ((__v2di *) __P,
6264 (__v2di)
6265 _mm_setzero_si128 (),
6266 (__mmask8)
6267 __U);
6268}
6269
6270extern __inline __m256i
6271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6272_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
6273{
6274 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
6275 (__v8si) __W,
6276 (__mmask8) __U);
6277}
6278
6279extern __inline __m256i
6280__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6281_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A)
6282{
6283 return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A,
6284 (__v8si)
6285 _mm256_setzero_si256 (),
6286 (__mmask8) __U);
6287}
6288
6289extern __inline __m256i
6290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6291_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
6292 void const *__P)
6293{
6294 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
6295 (__v8si) __W,
6296 (__mmask8)
6297 __U);
6298}
6299
6300extern __inline __m256i
6301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6302_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6303{
6304 return (__m256i) __builtin_ia32_expandloadsi256_maskz ((__v8si *) __P,
6305 (__v8si)
6306 _mm256_setzero_si256 (),
6307 (__mmask8)
6308 __U);
6309}
6310
6311extern __inline __m128i
6312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6313_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
6314{
6315 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
6316 (__v4si) __W,
6317 (__mmask8) __U);
6318}
6319
6320extern __inline __m128i
6321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6322_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A)
6323{
6324 return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A,
6325 (__v4si)
6326 _mm_setzero_si128 (),
6327 (__mmask8) __U);
6328}
6329
6330extern __inline __m128i
6331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6332_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
6333{
6334 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
6335 (__v4si) __W,
6336 (__mmask8)
6337 __U);
6338}
6339
6340extern __inline __m128i
6341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6342_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6343{
6344 return (__m128i) __builtin_ia32_expandloadsi128_maskz ((__v4si *) __P,
6345 (__v4si)
6346 _mm_setzero_si128 (),
6347 (__mmask8)
6348 __U);
6349}
6350
6351extern __inline __m256d
6352__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6353_mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B)
6354{
6355 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6356 /* idx */ ,
6357 (__v4df) __A,
6358 (__v4df) __B,
c42b0bdf 6359 (__mmask8) -1);
936c0fe4
AI
6360}
6361
6362extern __inline __m256d
6363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6364_mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
6365 __m256d __B)
6366{
6367 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6368 /* idx */ ,
6369 (__v4df) __A,
6370 (__v4df) __B,
6371 (__mmask8)
6372 __U);
6373}
6374
6375extern __inline __m256d
6376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6377_mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
6378 __m256d __B)
6379{
6380 return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
6381 (__v4di) __I
6382 /* idx */ ,
6383 (__v4df) __B,
6384 (__mmask8)
6385 __U);
6386}
6387
6388extern __inline __m256d
6389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6390_mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
6391 __m256d __B)
6392{
6393 return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
6394 /* idx */ ,
6395 (__v4df) __A,
6396 (__v4df) __B,
6397 (__mmask8)
6398 __U);
6399}
6400
6401extern __inline __m256
6402__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6403_mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B)
6404{
6405 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6406 /* idx */ ,
6407 (__v8sf) __A,
6408 (__v8sf) __B,
6409 (__mmask8) -1);
6410}
6411
6412extern __inline __m256
6413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6414_mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
6415 __m256 __B)
6416{
6417 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6418 /* idx */ ,
6419 (__v8sf) __A,
6420 (__v8sf) __B,
6421 (__mmask8) __U);
6422}
6423
6424extern __inline __m256
6425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6426_mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
6427 __m256 __B)
6428{
6429 return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
6430 (__v8si) __I
6431 /* idx */ ,
6432 (__v8sf) __B,
6433 (__mmask8) __U);
6434}
6435
6436extern __inline __m256
6437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6438_mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
6439 __m256 __B)
6440{
6441 return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
6442 /* idx */ ,
6443 (__v8sf) __A,
6444 (__v8sf) __B,
6445 (__mmask8)
6446 __U);
6447}
6448
6449extern __inline __m128i
6450__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6451_mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B)
6452{
6453 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6454 /* idx */ ,
6455 (__v2di) __A,
6456 (__v2di) __B,
6457 (__mmask8) -1);
6458}
6459
6460extern __inline __m128i
6461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6462_mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
6463 __m128i __B)
6464{
6465 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6466 /* idx */ ,
6467 (__v2di) __A,
6468 (__v2di) __B,
6469 (__mmask8) __U);
6470}
6471
6472extern __inline __m128i
6473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6474_mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
6475 __m128i __B)
6476{
6477 return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
6478 (__v2di) __I
6479 /* idx */ ,
6480 (__v2di) __B,
6481 (__mmask8) __U);
6482}
6483
6484extern __inline __m128i
6485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6486_mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
6487 __m128i __B)
6488{
6489 return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
6490 /* idx */ ,
6491 (__v2di) __A,
6492 (__v2di) __B,
6493 (__mmask8)
6494 __U);
6495}
6496
6497extern __inline __m128i
6498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6499_mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B)
6500{
6501 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6502 /* idx */ ,
6503 (__v4si) __A,
6504 (__v4si) __B,
6505 (__mmask8) -1);
6506}
6507
6508extern __inline __m128i
6509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6510_mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
6511 __m128i __B)
6512{
6513 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6514 /* idx */ ,
6515 (__v4si) __A,
6516 (__v4si) __B,
6517 (__mmask8) __U);
6518}
6519
6520extern __inline __m128i
6521__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6522_mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
6523 __m128i __B)
6524{
6525 return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
6526 (__v4si) __I
6527 /* idx */ ,
6528 (__v4si) __B,
6529 (__mmask8) __U);
6530}
6531
6532extern __inline __m128i
6533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6534_mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
6535 __m128i __B)
6536{
6537 return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
6538 /* idx */ ,
6539 (__v4si) __A,
6540 (__v4si) __B,
6541 (__mmask8)
6542 __U);
6543}
6544
6545extern __inline __m256i
6546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6547_mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B)
6548{
6549 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6550 /* idx */ ,
6551 (__v4di) __A,
6552 (__v4di) __B,
6553 (__mmask8) -1);
6554}
6555
6556extern __inline __m256i
6557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6558_mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
6559 __m256i __B)
6560{
6561 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6562 /* idx */ ,
6563 (__v4di) __A,
6564 (__v4di) __B,
6565 (__mmask8) __U);
6566}
6567
6568extern __inline __m256i
6569__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6570_mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
6571 __mmask8 __U, __m256i __B)
6572{
6573 return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
6574 (__v4di) __I
6575 /* idx */ ,
6576 (__v4di) __B,
6577 (__mmask8) __U);
6578}
6579
6580extern __inline __m256i
6581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6582_mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
6583 __m256i __I, __m256i __B)
6584{
6585 return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
6586 /* idx */ ,
6587 (__v4di) __A,
6588 (__v4di) __B,
6589 (__mmask8)
6590 __U);
6591}
6592
6593extern __inline __m256i
6594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6595_mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B)
6596{
6597 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6598 /* idx */ ,
6599 (__v8si) __A,
6600 (__v8si) __B,
6601 (__mmask8) -1);
6602}
6603
6604extern __inline __m256i
6605__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6606_mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
6607 __m256i __B)
6608{
6609 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6610 /* idx */ ,
6611 (__v8si) __A,
6612 (__v8si) __B,
6613 (__mmask8) __U);
6614}
6615
6616extern __inline __m256i
6617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6618_mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
6619 __mmask8 __U, __m256i __B)
6620{
6621 return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
6622 (__v8si) __I
6623 /* idx */ ,
6624 (__v8si) __B,
6625 (__mmask8) __U);
6626}
6627
6628extern __inline __m256i
6629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6630_mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
6631 __m256i __I, __m256i __B)
6632{
6633 return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
6634 /* idx */ ,
6635 (__v8si) __A,
6636 (__v8si) __B,
6637 (__mmask8)
6638 __U);
6639}
6640
6641extern __inline __m128d
6642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6643_mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B)
6644{
6645 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6646 /* idx */ ,
6647 (__v2df) __A,
6648 (__v2df) __B,
c42b0bdf 6649 (__mmask8) -1);
936c0fe4
AI
6650}
6651
6652extern __inline __m128d
6653__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6654_mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
6655 __m128d __B)
6656{
6657 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6658 /* idx */ ,
6659 (__v2df) __A,
6660 (__v2df) __B,
6661 (__mmask8)
6662 __U);
6663}
6664
6665extern __inline __m128d
6666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6667_mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
6668 __m128d __B)
6669{
6670 return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
6671 (__v2di) __I
6672 /* idx */ ,
6673 (__v2df) __B,
6674 (__mmask8)
6675 __U);
6676}
6677
6678extern __inline __m128d
6679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6680_mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
6681 __m128d __B)
6682{
6683 return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
6684 /* idx */ ,
6685 (__v2df) __A,
6686 (__v2df) __B,
6687 (__mmask8)
6688 __U);
6689}
6690
6691extern __inline __m128
6692__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6693_mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B)
6694{
6695 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6696 /* idx */ ,
6697 (__v4sf) __A,
6698 (__v4sf) __B,
6699 (__mmask8) -1);
6700}
6701
6702extern __inline __m128
6703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6704_mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
6705 __m128 __B)
6706{
6707 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6708 /* idx */ ,
6709 (__v4sf) __A,
6710 (__v4sf) __B,
6711 (__mmask8) __U);
6712}
6713
6714extern __inline __m128
6715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6716_mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
6717 __m128 __B)
6718{
6719 return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
6720 (__v4si) __I
6721 /* idx */ ,
6722 (__v4sf) __B,
6723 (__mmask8) __U);
6724}
6725
6726extern __inline __m128
6727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6728_mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
6729 __m128 __B)
6730{
6731 return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
6732 /* idx */ ,
6733 (__v4sf) __A,
6734 (__v4sf) __B,
6735 (__mmask8)
6736 __U);
6737}
6738
6739extern __inline __m128i
6740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6741_mm_srav_epi64 (__m128i __X, __m128i __Y)
6742{
6743 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6744 (__v2di) __Y,
6745 (__v2di)
a25a7887 6746 _mm_setzero_si128 (),
936c0fe4
AI
6747 (__mmask8) -1);
6748}
6749
6750extern __inline __m128i
6751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6752_mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6753 __m128i __Y)
6754{
6755 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6756 (__v2di) __Y,
6757 (__v2di) __W,
6758 (__mmask8) __U);
6759}
6760
6761extern __inline __m128i
6762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6763_mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6764{
6765 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6766 (__v2di) __Y,
6767 (__v2di)
a25a7887 6768 _mm_setzero_si128 (),
936c0fe4
AI
6769 (__mmask8) __U);
6770}
6771
6772extern __inline __m256i
6773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6774_mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6775 __m256i __Y)
6776{
6777 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6778 (__v8si) __Y,
6779 (__v8si) __W,
6780 (__mmask8) __U);
6781}
6782
6783extern __inline __m256i
6784__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6785_mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6786{
6787 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6788 (__v8si) __Y,
6789 (__v8si)
6790 _mm256_setzero_si256 (),
6791 (__mmask8) __U);
6792}
6793
6794extern __inline __m128i
6795__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6796_mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6797 __m128i __Y)
6798{
6799 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6800 (__v4si) __Y,
6801 (__v4si) __W,
6802 (__mmask8) __U);
6803}
6804
6805extern __inline __m128i
6806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6807_mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6808{
6809 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6810 (__v4si) __Y,
6811 (__v4si)
6812 _mm_setzero_si128 (),
6813 (__mmask8) __U);
6814}
6815
6816extern __inline __m256i
6817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6818_mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6819 __m256i __Y)
6820{
6821 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6822 (__v4di) __Y,
6823 (__v4di) __W,
6824 (__mmask8) __U);
6825}
6826
6827extern __inline __m256i
6828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6829_mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6830{
6831 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6832 (__v4di) __Y,
6833 (__v4di)
6834 _mm256_setzero_si256 (),
6835 (__mmask8) __U);
6836}
6837
6838extern __inline __m128i
6839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6840_mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6841 __m128i __Y)
6842{
6843 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6844 (__v2di) __Y,
6845 (__v2di) __W,
6846 (__mmask8) __U);
6847}
6848
6849extern __inline __m128i
6850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6851_mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6852{
6853 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6854 (__v2di) __Y,
6855 (__v2di)
a25a7887 6856 _mm_setzero_si128 (),
936c0fe4
AI
6857 (__mmask8) __U);
6858}
6859
6860extern __inline __m256i
6861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6862_mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6863 __m256i __Y)
6864{
6865 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6866 (__v8si) __Y,
6867 (__v8si) __W,
6868 (__mmask8) __U);
6869}
6870
6871extern __inline __m256i
6872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6873_mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6874{
6875 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6876 (__v8si) __Y,
6877 (__v8si)
6878 _mm256_setzero_si256 (),
6879 (__mmask8) __U);
6880}
6881
6882extern __inline __m128i
6883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6884_mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6885 __m128i __Y)
6886{
6887 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6888 (__v4si) __Y,
6889 (__v4si) __W,
6890 (__mmask8) __U);
6891}
6892
6893extern __inline __m128i
6894__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6895_mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6896{
6897 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6898 (__v4si) __Y,
6899 (__v4si)
6900 _mm_setzero_si128 (),
6901 (__mmask8) __U);
6902}
6903
6904extern __inline __m256i
6905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6906_mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6907 __m256i __Y)
6908{
6909 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6910 (__v8si) __Y,
6911 (__v8si) __W,
6912 (__mmask8) __U);
6913}
6914
6915extern __inline __m256i
6916__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6917_mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6918{
6919 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6920 (__v8si) __Y,
6921 (__v8si)
6922 _mm256_setzero_si256 (),
6923 (__mmask8) __U);
6924}
6925
6926extern __inline __m128i
6927__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6928_mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6929 __m128i __Y)
6930{
6931 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6932 (__v4si) __Y,
6933 (__v4si) __W,
6934 (__mmask8) __U);
6935}
6936
6937extern __inline __m128i
6938__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6939_mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6940{
6941 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6942 (__v4si) __Y,
6943 (__v4si)
6944 _mm_setzero_si128 (),
6945 (__mmask8) __U);
6946}
6947
6948extern __inline __m256i
6949__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6950_mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6951 __m256i __Y)
6952{
6953 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6954 (__v4di) __Y,
6955 (__v4di) __W,
6956 (__mmask8) __U);
6957}
6958
6959extern __inline __m256i
6960__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6961_mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6962{
6963 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6964 (__v4di) __Y,
6965 (__v4di)
6966 _mm256_setzero_si256 (),
6967 (__mmask8) __U);
6968}
6969
6970extern __inline __m128i
6971__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6972_mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6973 __m128i __Y)
6974{
6975 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6976 (__v2di) __Y,
6977 (__v2di) __W,
6978 (__mmask8) __U);
6979}
6980
6981extern __inline __m128i
6982__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6983_mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6984{
6985 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6986 (__v2di) __Y,
6987 (__v2di)
a25a7887 6988 _mm_setzero_si128 (),
936c0fe4
AI
6989 (__mmask8) __U);
6990}
6991
6992extern __inline __m256i
6993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6994_mm256_rolv_epi32 (__m256i __A, __m256i __B)
6995{
6996 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6997 (__v8si) __B,
6998 (__v8si)
6999 _mm256_setzero_si256 (),
7000 (__mmask8) -1);
7001}
7002
7003extern __inline __m256i
7004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7005_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
7006 __m256i __B)
7007{
7008 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
7009 (__v8si) __B,
7010 (__v8si) __W,
7011 (__mmask8) __U);
7012}
7013
7014extern __inline __m256i
7015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7016_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
7017{
7018 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
7019 (__v8si) __B,
7020 (__v8si)
7021 _mm256_setzero_si256 (),
7022 (__mmask8) __U);
7023}
7024
7025extern __inline __m128i
7026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7027_mm_rolv_epi32 (__m128i __A, __m128i __B)
7028{
7029 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7030 (__v4si) __B,
7031 (__v4si)
7032 _mm_setzero_si128 (),
7033 (__mmask8) -1);
7034}
7035
7036extern __inline __m128i
7037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7038_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
7039 __m128i __B)
7040{
7041 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7042 (__v4si) __B,
7043 (__v4si) __W,
7044 (__mmask8) __U);
7045}
7046
7047extern __inline __m128i
7048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7049_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
7050{
7051 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7052 (__v4si) __B,
7053 (__v4si)
7054 _mm_setzero_si128 (),
7055 (__mmask8) __U);
7056}
7057
7058extern __inline __m256i
7059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7060_mm256_rorv_epi32 (__m256i __A, __m256i __B)
7061{
7062 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7063 (__v8si) __B,
7064 (__v8si)
7065 _mm256_setzero_si256 (),
7066 (__mmask8) -1);
7067}
7068
7069extern __inline __m256i
7070__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7071_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
7072 __m256i __B)
7073{
7074 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7075 (__v8si) __B,
7076 (__v8si) __W,
7077 (__mmask8) __U);
7078}
7079
7080extern __inline __m256i
7081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7082_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
7083{
7084 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7085 (__v8si) __B,
7086 (__v8si)
7087 _mm256_setzero_si256 (),
7088 (__mmask8) __U);
7089}
7090
7091extern __inline __m128i
7092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7093_mm_rorv_epi32 (__m128i __A, __m128i __B)
7094{
7095 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7096 (__v4si) __B,
7097 (__v4si)
7098 _mm_setzero_si128 (),
7099 (__mmask8) -1);
7100}
7101
7102extern __inline __m128i
7103__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7104_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
7105 __m128i __B)
7106{
7107 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7108 (__v4si) __B,
7109 (__v4si) __W,
7110 (__mmask8) __U);
7111}
7112
7113extern __inline __m128i
7114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7115_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
7116{
7117 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7118 (__v4si) __B,
7119 (__v4si)
7120 _mm_setzero_si128 (),
7121 (__mmask8) __U);
7122}
7123
7124extern __inline __m256i
7125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7126_mm256_rolv_epi64 (__m256i __A, __m256i __B)
7127{
7128 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7129 (__v4di) __B,
7130 (__v4di)
7131 _mm256_setzero_si256 (),
7132 (__mmask8) -1);
7133}
7134
7135extern __inline __m256i
7136__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7137_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7138 __m256i __B)
7139{
7140 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7141 (__v4di) __B,
7142 (__v4di) __W,
7143 (__mmask8) __U);
7144}
7145
7146extern __inline __m256i
7147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7148_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7149{
7150 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7151 (__v4di) __B,
7152 (__v4di)
7153 _mm256_setzero_si256 (),
7154 (__mmask8) __U);
7155}
7156
7157extern __inline __m128i
7158__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7159_mm_rolv_epi64 (__m128i __A, __m128i __B)
7160{
7161 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7162 (__v2di) __B,
7163 (__v2di)
a25a7887 7164 _mm_setzero_si128 (),
936c0fe4
AI
7165 (__mmask8) -1);
7166}
7167
7168extern __inline __m128i
7169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7170_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7171 __m128i __B)
7172{
7173 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7174 (__v2di) __B,
7175 (__v2di) __W,
7176 (__mmask8) __U);
7177}
7178
7179extern __inline __m128i
7180__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7181_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7182{
7183 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7184 (__v2di) __B,
7185 (__v2di)
a25a7887 7186 _mm_setzero_si128 (),
936c0fe4
AI
7187 (__mmask8) __U);
7188}
7189
7190extern __inline __m256i
7191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7192_mm256_rorv_epi64 (__m256i __A, __m256i __B)
7193{
7194 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7195 (__v4di) __B,
7196 (__v4di)
7197 _mm256_setzero_si256 (),
7198 (__mmask8) -1);
7199}
7200
7201extern __inline __m256i
7202__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7203_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7204 __m256i __B)
7205{
7206 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7207 (__v4di) __B,
7208 (__v4di) __W,
7209 (__mmask8) __U);
7210}
7211
7212extern __inline __m256i
7213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7214_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7215{
7216 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7217 (__v4di) __B,
7218 (__v4di)
7219 _mm256_setzero_si256 (),
7220 (__mmask8) __U);
7221}
7222
7223extern __inline __m128i
7224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7225_mm_rorv_epi64 (__m128i __A, __m128i __B)
7226{
7227 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7228 (__v2di) __B,
7229 (__v2di)
a25a7887 7230 _mm_setzero_si128 (),
936c0fe4
AI
7231 (__mmask8) -1);
7232}
7233
7234extern __inline __m128i
7235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7236_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7237 __m128i __B)
7238{
7239 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7240 (__v2di) __B,
7241 (__v2di) __W,
7242 (__mmask8) __U);
7243}
7244
7245extern __inline __m128i
7246__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7247_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7248{
7249 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7250 (__v2di) __B,
7251 (__v2di)
a25a7887 7252 _mm_setzero_si128 (),
936c0fe4
AI
7253 (__mmask8) __U);
7254}
7255
7256extern __inline __m256i
7257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7258_mm256_srav_epi64 (__m256i __X, __m256i __Y)
7259{
7260 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7261 (__v4di) __Y,
7262 (__v4di)
7263 _mm256_setzero_si256 (),
7264 (__mmask8) -1);
7265}
7266
7267extern __inline __m256i
7268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7269_mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
7270 __m256i __Y)
7271{
7272 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7273 (__v4di) __Y,
7274 (__v4di) __W,
7275 (__mmask8) __U);
7276}
7277
7278extern __inline __m256i
7279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7280_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
7281{
7282 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7283 (__v4di) __Y,
7284 (__v4di)
7285 _mm256_setzero_si256 (),
7286 (__mmask8) __U);
7287}
7288
7289extern __inline __m256i
7290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7291_mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7292 __m256i __B)
7293{
7294 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7295 (__v4di) __B,
7296 (__v4di) __W, __U);
7297}
7298
7299extern __inline __m256i
7300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7301_mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7302{
7303 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7304 (__v4di) __B,
7305 (__v4di)
7306 _mm256_setzero_pd (),
7307 __U);
7308}
7309
7310extern __inline __m128i
7311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7312_mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7313 __m128i __B)
7314{
7315 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7316 (__v2di) __B,
7317 (__v2di) __W, __U);
7318}
7319
7320extern __inline __m128i
7321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7322_mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7323{
7324 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7325 (__v2di) __B,
7326 (__v2di)
7327 _mm_setzero_pd (),
7328 __U);
7329}
7330
7331extern __inline __m256i
7332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7333_mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7334 __m256i __B)
7335{
7336 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7337 (__v4di) __B,
7338 (__v4di) __W, __U);
7339}
7340
7341extern __inline __m256i
7342__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7343_mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7344{
7345 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7346 (__v4di) __B,
7347 (__v4di)
7348 _mm256_setzero_pd (),
7349 __U);
7350}
7351
7352extern __inline __m128i
7353__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7354_mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7355 __m128i __B)
7356{
7357 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7358 (__v2di) __B,
7359 (__v2di) __W, __U);
7360}
7361
7362extern __inline __m128i
7363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7364_mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7365{
7366 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7367 (__v2di) __B,
7368 (__v2di)
7369 _mm_setzero_pd (),
7370 __U);
7371}
7372
7373extern __inline __m256i
7374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7375_mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7376 __m256i __B)
7377{
7378 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7379 (__v4di) __B,
7380 (__v4di) __W,
7381 (__mmask8) __U);
7382}
7383
7384extern __inline __m256i
7385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7386_mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7387{
7388 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7389 (__v4di) __B,
7390 (__v4di)
7391 _mm256_setzero_si256 (),
7392 (__mmask8) __U);
7393}
7394
01fd9f8d
L
7395extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7396_mm256_or_epi64 (__m256i __A, __m256i __B)
7397{
7398 return (__m256i) ((__v4du)__A | (__v4du)__B);
7399}
7400
936c0fe4
AI
7401extern __inline __m128i
7402__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7403_mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
7404{
7405 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7406 (__v2di) __B,
7407 (__v2di) __W,
7408 (__mmask8) __U);
7409}
7410
7411extern __inline __m128i
7412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7413_mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7414{
7415 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7416 (__v2di) __B,
7417 (__v2di)
7418 _mm_setzero_si128 (),
7419 (__mmask8) __U);
7420}
7421
01fd9f8d
L
7422extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7423_mm_or_epi64 (__m128i __A, __m128i __B)
7424{
7425 return (__m128i) ((__v2du)__A | (__v2du)__B);
7426}
7427
936c0fe4
AI
7428extern __inline __m256i
7429__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7430_mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7431 __m256i __B)
7432{
7433 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7434 (__v4di) __B,
7435 (__v4di) __W,
7436 (__mmask8) __U);
7437}
7438
7439extern __inline __m256i
7440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7441_mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7442{
7443 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7444 (__v4di) __B,
7445 (__v4di)
7446 _mm256_setzero_si256 (),
7447 (__mmask8) __U);
7448}
7449
01fd9f8d
L
7450extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7451_mm256_xor_epi64 (__m256i __A, __m256i __B)
7452{
7453 return (__m256i) ((__v4du)__A ^ (__v4du)__B);
7454}
7455
936c0fe4
AI
7456extern __inline __m128i
7457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7458_mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7459 __m128i __B)
7460{
7461 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7462 (__v2di) __B,
7463 (__v2di) __W,
7464 (__mmask8) __U);
7465}
7466
7467extern __inline __m128i
7468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7469_mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7470{
7471 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7472 (__v2di) __B,
7473 (__v2di)
7474 _mm_setzero_si128 (),
7475 (__mmask8) __U);
7476}
7477
01fd9f8d
L
7478extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7479_mm_xor_epi64 (__m128i __A, __m128i __B)
7480{
7481 return (__m128i) ((__v2du)__A ^ (__v2du)__B);
7482}
7483
936c0fe4
AI
7484extern __inline __m256d
7485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7486_mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
7487 __m256d __B)
7488{
7489 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7490 (__v4df) __B,
7491 (__v4df) __W,
7492 (__mmask8) __U);
7493}
7494
7495extern __inline __m256d
7496__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7497_mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B)
7498{
7499 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7500 (__v4df) __B,
7501 (__v4df)
7502 _mm256_setzero_pd (),
7503 (__mmask8) __U);
7504}
7505
7506extern __inline __m256
7507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7508_mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7509{
7510 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7511 (__v8sf) __B,
7512 (__v8sf) __W,
7513 (__mmask8) __U);
7514}
7515
7516extern __inline __m256
7517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7518_mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B)
7519{
7520 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7521 (__v8sf) __B,
7522 (__v8sf)
7523 _mm256_setzero_ps (),
7524 (__mmask8) __U);
7525}
7526
7527extern __inline __m128
7528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7529_mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7530{
7531 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7532 (__v4sf) __B,
7533 (__v4sf) __W,
7534 (__mmask8) __U);
7535}
7536
7537extern __inline __m128
7538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7539_mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B)
7540{
7541 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7542 (__v4sf) __B,
7543 (__v4sf)
7544 _mm_setzero_ps (),
7545 (__mmask8) __U);
7546}
7547
7548extern __inline __m128d
7549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7550_mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7551{
7552 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7553 (__v2df) __B,
7554 (__v2df) __W,
7555 (__mmask8) __U);
7556}
7557
7558extern __inline __m128d
7559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7560_mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B)
7561{
7562 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7563 (__v2df) __B,
7564 (__v2df)
7565 _mm_setzero_pd (),
7566 (__mmask8) __U);
7567}
7568
7569extern __inline __m256d
7570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7571_mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
7572 __m256d __B)
7573{
7574 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7575 (__v4df) __B,
7576 (__v4df) __W,
7577 (__mmask8) __U);
7578}
7579
7580extern __inline __m256d
7581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7582_mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
7583 __m256d __B)
7584{
7585 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7586 (__v4df) __B,
7587 (__v4df) __W,
7588 (__mmask8) __U);
7589}
7590
7591extern __inline __m256d
7592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7593_mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B)
7594{
7595 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7596 (__v4df) __B,
7597 (__v4df)
7598 _mm256_setzero_pd (),
7599 (__mmask8) __U);
7600}
7601
7602extern __inline __m256
7603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7604_mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7605{
7606 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7607 (__v8sf) __B,
7608 (__v8sf) __W,
7609 (__mmask8) __U);
7610}
7611
7612extern __inline __m256d
7613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7614_mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B)
7615{
7616 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7617 (__v4df) __B,
7618 (__v4df)
7619 _mm256_setzero_pd (),
7620 (__mmask8) __U);
7621}
7622
7623extern __inline __m256
7624__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7625_mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7626{
7627 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7628 (__v8sf) __B,
7629 (__v8sf) __W,
7630 (__mmask8) __U);
7631}
7632
7633extern __inline __m256
7634__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7635_mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B)
7636{
7637 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7638 (__v8sf) __B,
7639 (__v8sf)
7640 _mm256_setzero_ps (),
7641 (__mmask8) __U);
7642}
7643
7644extern __inline __m256
7645__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7646_mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B)
7647{
7648 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7649 (__v8sf) __B,
7650 (__v8sf)
7651 _mm256_setzero_ps (),
7652 (__mmask8) __U);
7653}
7654
7655extern __inline __m128
7656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7657_mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7658{
7659 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7660 (__v4sf) __B,
7661 (__v4sf) __W,
7662 (__mmask8) __U);
7663}
7664
7665extern __inline __m128
7666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7667_mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7668{
7669 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7670 (__v4sf) __B,
7671 (__v4sf) __W,
7672 (__mmask8) __U);
7673}
7674
7675extern __inline __m128
7676__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7677_mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B)
7678{
7679 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7680 (__v4sf) __B,
7681 (__v4sf)
7682 _mm_setzero_ps (),
7683 (__mmask8) __U);
7684}
7685
7686extern __inline __m128
7687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7688_mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B)
7689{
7690 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7691 (__v4sf) __B,
7692 (__v4sf)
7693 _mm_setzero_ps (),
7694 (__mmask8) __U);
7695}
7696
7697extern __inline __m128
7698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7699_mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7700{
7701 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7702 (__v4sf) __B,
7703 (__v4sf) __W,
7704 (__mmask8) __U);
7705}
7706
7707extern __inline __m128
7708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7709_mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B)
7710{
7711 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7712 (__v4sf) __B,
7713 (__v4sf)
7714 _mm_setzero_ps (),
7715 (__mmask8) __U);
7716}
7717
7718extern __inline __m128d
7719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7720_mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7721{
7722 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7723 (__v2df) __B,
7724 (__v2df) __W,
7725 (__mmask8) __U);
7726}
7727
7728extern __inline __m128d
7729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7730_mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B)
7731{
7732 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7733 (__v2df) __B,
7734 (__v2df)
7735 _mm_setzero_pd (),
7736 (__mmask8) __U);
7737}
7738
7739extern __inline __m128d
7740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7741_mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7742{
7743 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7744 (__v2df) __B,
7745 (__v2df) __W,
7746 (__mmask8) __U);
7747}
7748
7749extern __inline __m128d
7750__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7751_mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B)
7752{
7753 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7754 (__v2df) __B,
7755 (__v2df)
7756 _mm_setzero_pd (),
7757 (__mmask8) __U);
7758}
7759
7760extern __inline __m128d
7761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7762_mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7763{
7764 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7765 (__v2df) __B,
7766 (__v2df) __W,
7767 (__mmask8) __U);
7768}
7769
7770extern __inline __m128d
7771__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7772_mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B)
7773{
7774 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7775 (__v2df) __B,
7776 (__v2df)
7777 _mm_setzero_pd (),
7778 (__mmask8) __U);
7779}
7780
7781extern __inline __m256
7782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7783_mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7784{
7785 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7786 (__v8sf) __B,
7787 (__v8sf) __W,
7788 (__mmask8) __U);
7789}
7790
7791extern __inline __m256
7792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7793_mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B)
7794{
7795 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7796 (__v8sf) __B,
7797 (__v8sf)
7798 _mm256_setzero_ps (),
7799 (__mmask8) __U);
7800}
7801
7802extern __inline __m256d
7803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7804_mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
7805 __m256d __B)
7806{
7807 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7808 (__v4df) __B,
7809 (__v4df) __W,
7810 (__mmask8) __U);
7811}
7812
7813extern __inline __m256d
7814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7815_mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B)
7816{
7817 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7818 (__v4df) __B,
7819 (__v4df)
7820 _mm256_setzero_pd (),
7821 (__mmask8) __U);
7822}
7823
7824extern __inline __m256i
7825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7826_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7827{
7828 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7829 (__v4di) __B,
7830 (__v4di)
7831 _mm256_setzero_si256 (),
7832 __M);
7833}
7834
7835extern __inline __m256i
7836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7837_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7838 __m256i __B)
7839{
7840 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7841 (__v4di) __B,
7842 (__v4di) __W, __M);
7843}
7844
7845extern __inline __m256i
7846__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7847_mm256_min_epi64 (__m256i __A, __m256i __B)
7848{
7849 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7850 (__v4di) __B,
7851 (__v4di)
7852 _mm256_setzero_si256 (),
7853 (__mmask8) -1);
7854}
7855
7856extern __inline __m256i
7857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7858_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7859 __m256i __B)
7860{
7861 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7862 (__v4di) __B,
7863 (__v4di) __W, __M);
7864}
7865
7866extern __inline __m256i
7867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7868_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7869{
7870 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7871 (__v4di) __B,
7872 (__v4di)
7873 _mm256_setzero_si256 (),
7874 __M);
7875}
7876
7877extern __inline __m256i
7878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7879_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7880{
7881 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7882 (__v4di) __B,
7883 (__v4di)
7884 _mm256_setzero_si256 (),
7885 __M);
7886}
7887
7888extern __inline __m256i
7889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7890_mm256_max_epi64 (__m256i __A, __m256i __B)
7891{
7892 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7893 (__v4di) __B,
7894 (__v4di)
7895 _mm256_setzero_si256 (),
7896 (__mmask8) -1);
7897}
7898
7899extern __inline __m256i
7900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7901_mm256_max_epu64 (__m256i __A, __m256i __B)
7902{
7903 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7904 (__v4di) __B,
7905 (__v4di)
7906 _mm256_setzero_si256 (),
7907 (__mmask8) -1);
7908}
7909
7910extern __inline __m256i
7911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7912_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7913 __m256i __B)
7914{
7915 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7916 (__v4di) __B,
7917 (__v4di) __W, __M);
7918}
7919
7920extern __inline __m256i
7921__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7922_mm256_min_epu64 (__m256i __A, __m256i __B)
7923{
7924 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7925 (__v4di) __B,
7926 (__v4di)
7927 _mm256_setzero_si256 (),
7928 (__mmask8) -1);
7929}
7930
7931extern __inline __m256i
7932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7933_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7934 __m256i __B)
7935{
7936 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7937 (__v4di) __B,
7938 (__v4di) __W, __M);
7939}
7940
7941extern __inline __m256i
7942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7943_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7944{
7945 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7946 (__v4di) __B,
7947 (__v4di)
7948 _mm256_setzero_si256 (),
7949 __M);
7950}
7951
7952extern __inline __m256i
7953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7954_mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7955{
7956 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7957 (__v8si) __B,
7958 (__v8si)
7959 _mm256_setzero_si256 (),
7960 __M);
7961}
7962
7963extern __inline __m256i
7964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7965_mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7966 __m256i __B)
7967{
7968 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7969 (__v8si) __B,
7970 (__v8si) __W, __M);
7971}
7972
7973extern __inline __m256i
7974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7975_mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7976{
7977 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7978 (__v8si) __B,
7979 (__v8si)
7980 _mm256_setzero_si256 (),
7981 __M);
7982}
7983
7984extern __inline __m256i
7985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7986_mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7987 __m256i __B)
7988{
7989 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7990 (__v8si) __B,
7991 (__v8si) __W, __M);
7992}
7993
7994extern __inline __m256i
7995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7996_mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7997{
7998 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7999 (__v8si) __B,
8000 (__v8si)
8001 _mm256_setzero_si256 (),
8002 __M);
8003}
8004
8005extern __inline __m256i
8006__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8007_mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
8008 __m256i __B)
8009{
8010 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
8011 (__v8si) __B,
8012 (__v8si) __W, __M);
8013}
8014
8015extern __inline __m256i
8016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8017_mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
8018{
8019 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
8020 (__v8si) __B,
8021 (__v8si)
8022 _mm256_setzero_si256 (),
8023 __M);
8024}
8025
8026extern __inline __m256i
8027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8028_mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
8029 __m256i __B)
8030{
8031 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
8032 (__v8si) __B,
8033 (__v8si) __W, __M);
8034}
8035
8036extern __inline __m128i
8037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8038_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
8039{
8040 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8041 (__v2di) __B,
8042 (__v2di)
8043 _mm_setzero_si128 (),
8044 __M);
8045}
8046
8047extern __inline __m128i
8048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8049_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
8050 __m128i __B)
8051{
8052 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8053 (__v2di) __B,
8054 (__v2di) __W, __M);
8055}
8056
8057extern __inline __m128i
8058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8059_mm_min_epi64 (__m128i __A, __m128i __B)
8060{
8061 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8062 (__v2di) __B,
8063 (__v2di)
a25a7887 8064 _mm_setzero_si128 (),
936c0fe4
AI
8065 (__mmask8) -1);
8066}
8067
8068extern __inline __m128i
8069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8070_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
8071 __m128i __B)
8072{
8073 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8074 (__v2di) __B,
8075 (__v2di) __W, __M);
8076}
8077
8078extern __inline __m128i
8079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8080_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
8081{
8082 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8083 (__v2di) __B,
8084 (__v2di)
8085 _mm_setzero_si128 (),
8086 __M);
8087}
8088
8089extern __inline __m128i
8090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8091_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8092{
8093 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8094 (__v2di) __B,
8095 (__v2di)
8096 _mm_setzero_si128 (),
8097 __M);
8098}
8099
8100extern __inline __m128i
8101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8102_mm_max_epi64 (__m128i __A, __m128i __B)
8103{
8104 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8105 (__v2di) __B,
8106 (__v2di)
a25a7887 8107 _mm_setzero_si128 (),
936c0fe4
AI
8108 (__mmask8) -1);
8109}
8110
8111extern __inline __m128i
8112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8113_mm_max_epu64 (__m128i __A, __m128i __B)
8114{
8115 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8116 (__v2di) __B,
8117 (__v2di)
a25a7887 8118 _mm_setzero_si128 (),
936c0fe4
AI
8119 (__mmask8) -1);
8120}
8121
8122extern __inline __m128i
8123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8124_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8125 __m128i __B)
8126{
8127 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8128 (__v2di) __B,
8129 (__v2di) __W, __M);
8130}
8131
8132extern __inline __m128i
8133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8134_mm_min_epu64 (__m128i __A, __m128i __B)
8135{
8136 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8137 (__v2di) __B,
8138 (__v2di)
a25a7887 8139 _mm_setzero_si128 (),
936c0fe4
AI
8140 (__mmask8) -1);
8141}
8142
8143extern __inline __m128i
8144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8145_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8146 __m128i __B)
8147{
8148 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8149 (__v2di) __B,
8150 (__v2di) __W, __M);
8151}
8152
8153extern __inline __m128i
8154__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8155_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8156{
8157 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8158 (__v2di) __B,
8159 (__v2di)
8160 _mm_setzero_si128 (),
8161 __M);
8162}
8163
8164extern __inline __m128i
8165__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8166_mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8167{
8168 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8169 (__v4si) __B,
8170 (__v4si)
8171 _mm_setzero_si128 (),
8172 __M);
8173}
8174
8175extern __inline __m128i
8176__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8177_mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8178 __m128i __B)
8179{
8180 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8181 (__v4si) __B,
8182 (__v4si) __W, __M);
8183}
8184
8185extern __inline __m128i
8186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8187_mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8188{
8189 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8190 (__v4si) __B,
8191 (__v4si)
8192 _mm_setzero_si128 (),
8193 __M);
8194}
8195
8196extern __inline __m128i
8197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8198_mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8199 __m128i __B)
8200{
8201 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8202 (__v4si) __B,
8203 (__v4si) __W, __M);
8204}
8205
8206extern __inline __m128i
8207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8208_mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8209{
8210 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8211 (__v4si) __B,
8212 (__v4si)
8213 _mm_setzero_si128 (),
8214 __M);
8215}
8216
8217extern __inline __m128i
8218__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8219_mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8220 __m128i __B)
8221{
8222 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8223 (__v4si) __B,
8224 (__v4si) __W, __M);
8225}
8226
8227extern __inline __m128i
8228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8229_mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8230{
8231 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8232 (__v4si) __B,
8233 (__v4si)
8234 _mm_setzero_si128 (),
8235 __M);
8236}
8237
8238extern __inline __m128i
8239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8240_mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8241 __m128i __B)
8242{
8243 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8244 (__v4si) __B,
8245 (__v4si) __W, __M);
8246}
8247
8248#ifndef __AVX512CD__
8249#pragma GCC push_options
8250#pragma GCC target("avx512vl,avx512cd")
8251#define __DISABLE_AVX512VLCD__
8252#endif
8253
8254extern __inline __m128i
8255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8256_mm_broadcastmb_epi64 (__mmask8 __A)
8257{
8258 return (__m128i) __builtin_ia32_broadcastmb128 (__A);
8259}
8260
8261extern __inline __m256i
8262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8263_mm256_broadcastmb_epi64 (__mmask8 __A)
8264{
8265 return (__m256i) __builtin_ia32_broadcastmb256 (__A);
8266}
8267
8268extern __inline __m128i
8269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8270_mm_broadcastmw_epi32 (__mmask16 __A)
8271{
8272 return (__m128i) __builtin_ia32_broadcastmw128 (__A);
8273}
8274
8275extern __inline __m256i
8276__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8277_mm256_broadcastmw_epi32 (__mmask16 __A)
8278{
8279 return (__m256i) __builtin_ia32_broadcastmw256 (__A);
8280}
8281
8282extern __inline __m256i
8283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8284_mm256_lzcnt_epi32 (__m256i __A)
8285{
8286 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8287 (__v8si)
8288 _mm256_setzero_si256 (),
8289 (__mmask8) -1);
8290}
8291
8292extern __inline __m256i
8293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8294_mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8295{
8296 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8297 (__v8si) __W,
8298 (__mmask8) __U);
8299}
8300
8301extern __inline __m256i
8302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8303_mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
8304{
8305 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8306 (__v8si)
8307 _mm256_setzero_si256 (),
8308 (__mmask8) __U);
8309}
8310
8311extern __inline __m256i
8312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8313_mm256_lzcnt_epi64 (__m256i __A)
8314{
8315 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8316 (__v4di)
8317 _mm256_setzero_si256 (),
8318 (__mmask8) -1);
8319}
8320
8321extern __inline __m256i
8322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8323_mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8324{
8325 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8326 (__v4di) __W,
8327 (__mmask8) __U);
8328}
8329
8330extern __inline __m256i
8331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8332_mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
8333{
8334 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8335 (__v4di)
8336 _mm256_setzero_si256 (),
8337 (__mmask8) __U);
8338}
8339
8340extern __inline __m256i
8341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8342_mm256_conflict_epi64 (__m256i __A)
8343{
8344 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8345 (__v4di)
8346 _mm256_setzero_si256 (),
c42b0bdf 8347 (__mmask8) -1);
936c0fe4
AI
8348}
8349
8350extern __inline __m256i
8351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8352_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8353{
8354 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8355 (__v4di) __W,
8356 (__mmask8)
8357 __U);
8358}
8359
8360extern __inline __m256i
8361__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8362_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
8363{
8364 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8365 (__v4di)
8366 _mm256_setzero_si256 (),
8367 (__mmask8)
8368 __U);
8369}
8370
8371extern __inline __m256i
8372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8373_mm256_conflict_epi32 (__m256i __A)
8374{
8375 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8376 (__v8si)
8377 _mm256_setzero_si256 (),
c42b0bdf 8378 (__mmask8) -1);
936c0fe4
AI
8379}
8380
8381extern __inline __m256i
8382__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8383_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8384{
8385 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8386 (__v8si) __W,
8387 (__mmask8)
8388 __U);
8389}
8390
8391extern __inline __m256i
8392__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8393_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
8394{
8395 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8396 (__v8si)
8397 _mm256_setzero_si256 (),
8398 (__mmask8)
8399 __U);
8400}
8401
8402extern __inline __m128i
8403__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8404_mm_lzcnt_epi32 (__m128i __A)
8405{
8406 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8407 (__v4si)
8408 _mm_setzero_si128 (),
8409 (__mmask8) -1);
8410}
8411
8412extern __inline __m128i
8413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8414_mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8415{
8416 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8417 (__v4si) __W,
8418 (__mmask8) __U);
8419}
8420
8421extern __inline __m128i
8422__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8423_mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
8424{
8425 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8426 (__v4si)
8427 _mm_setzero_si128 (),
8428 (__mmask8) __U);
8429}
8430
8431extern __inline __m128i
8432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8433_mm_lzcnt_epi64 (__m128i __A)
8434{
8435 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8436 (__v2di)
a25a7887 8437 _mm_setzero_si128 (),
936c0fe4
AI
8438 (__mmask8) -1);
8439}
8440
8441extern __inline __m128i
8442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8443_mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8444{
8445 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8446 (__v2di) __W,
8447 (__mmask8) __U);
8448}
8449
8450extern __inline __m128i
8451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8452_mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
8453{
8454 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8455 (__v2di)
a25a7887 8456 _mm_setzero_si128 (),
936c0fe4
AI
8457 (__mmask8) __U);
8458}
8459
8460extern __inline __m128i
8461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8462_mm_conflict_epi64 (__m128i __A)
8463{
8464 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8465 (__v2di)
a25a7887 8466 _mm_setzero_si128 (),
c42b0bdf 8467 (__mmask8) -1);
936c0fe4
AI
8468}
8469
8470extern __inline __m128i
8471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8472_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8473{
8474 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8475 (__v2di) __W,
8476 (__mmask8)
8477 __U);
8478}
8479
8480extern __inline __m128i
8481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8482_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
8483{
8484 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8485 (__v2di)
a25a7887 8486 _mm_setzero_si128 (),
936c0fe4
AI
8487 (__mmask8)
8488 __U);
8489}
8490
8491extern __inline __m128i
8492__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8493_mm_conflict_epi32 (__m128i __A)
8494{
8495 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8496 (__v4si)
8497 _mm_setzero_si128 (),
c42b0bdf 8498 (__mmask8) -1);
936c0fe4
AI
8499}
8500
8501extern __inline __m128i
8502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8503_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8504{
8505 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8506 (__v4si) __W,
8507 (__mmask8)
8508 __U);
8509}
8510
8511extern __inline __m128i
8512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8513_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
8514{
8515 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8516 (__v4si)
8517 _mm_setzero_si128 (),
8518 (__mmask8)
8519 __U);
8520}
8521
8522#ifdef __DISABLE_AVX512VLCD__
8523#pragma GCC pop_options
8524#endif
8525
8526extern __inline __m256d
8527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8528_mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A,
8529 __m256d __B)
8530{
8531 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8532 (__v4df) __B,
8533 (__v4df) __W,
8534 (__mmask8) __U);
8535}
8536
8537extern __inline __m256d
8538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8539_mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
8540{
8541 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8542 (__v4df) __B,
8543 (__v4df)
8544 _mm256_setzero_pd (),
8545 (__mmask8) __U);
8546}
8547
8548extern __inline __m128d
8549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8550_mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A,
8551 __m128d __B)
8552{
8553 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8554 (__v2df) __B,
8555 (__v2df) __W,
8556 (__mmask8) __U);
8557}
8558
8559extern __inline __m128d
8560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8561_mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
8562{
8563 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8564 (__v2df) __B,
8565 (__v2df)
8566 _mm_setzero_pd (),
8567 (__mmask8) __U);
8568}
8569
8570extern __inline __m256
8571__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8572_mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A,
8573 __m256 __B)
8574{
8575 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8576 (__v8sf) __B,
8577 (__v8sf) __W,
8578 (__mmask8) __U);
8579}
8580
8581extern __inline __m256d
8582__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8583_mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A,
8584 __m256d __B)
8585{
8586 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8587 (__v4df) __B,
8588 (__v4df) __W,
8589 (__mmask8) __U);
8590}
8591
8592extern __inline __m256d
8593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8594_mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
8595{
8596 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8597 (__v4df) __B,
8598 (__v4df)
8599 _mm256_setzero_pd (),
8600 (__mmask8) __U);
8601}
8602
8603extern __inline __m128d
8604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8605_mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A,
8606 __m128d __B)
8607{
8608 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8609 (__v2df) __B,
8610 (__v2df) __W,
8611 (__mmask8) __U);
8612}
8613
8614extern __inline __m128d
8615__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8616_mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
8617{
8618 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8619 (__v2df) __B,
8620 (__v2df)
8621 _mm_setzero_pd (),
8622 (__mmask8) __U);
8623}
8624
8625extern __inline __m256
8626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8627_mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A,
8628 __m256 __B)
8629{
8630 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8631 (__v8sf) __B,
8632 (__v8sf) __W,
8633 (__mmask8) __U);
8634}
8635
8636extern __inline __m256
8637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8638_mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
8639{
8640 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8641 (__v8sf) __B,
8642 (__v8sf)
8643 _mm256_setzero_ps (),
8644 (__mmask8) __U);
8645}
8646
8647extern __inline __m128
8648__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8649_mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8650{
8651 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8652 (__v4sf) __B,
8653 (__v4sf) __W,
8654 (__mmask8) __U);
8655}
8656
8657extern __inline __m128
8658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8659_mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
8660{
8661 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8662 (__v4sf) __B,
8663 (__v4sf)
8664 _mm_setzero_ps (),
8665 (__mmask8) __U);
8666}
8667
8668extern __inline __m128
8669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8670_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8671{
8672 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8673 (__v4sf) __W,
8674 (__mmask8) __U);
8675}
8676
8677extern __inline __m128
8678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8679_mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8680{
8681 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8682 (__v4sf)
8683 _mm_setzero_ps (),
8684 (__mmask8) __U);
8685}
8686
8687extern __inline __m256
8688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8689_mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
8690{
8691 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8692 (__v8sf) __B,
8693 (__v8sf)
8694 _mm256_setzero_ps (),
8695 (__mmask8) __U);
8696}
8697
8698extern __inline __m256
8699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8700_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8701{
8702 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8703 (__v8sf) __W,
8704 (__mmask8) __U);
8705}
8706
8707extern __inline __m256
8708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8709_mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8710{
8711 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8712 (__v8sf)
8713 _mm256_setzero_ps (),
8714 (__mmask8) __U);
8715}
8716
8717extern __inline __m128
8718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8719_mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8720{
8721 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8722 (__v4sf) __B,
8723 (__v4sf) __W,
8724 (__mmask8) __U);
8725}
8726
8727extern __inline __m128
8728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8729_mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
8730{
8731 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8732 (__v4sf) __B,
8733 (__v4sf)
8734 _mm_setzero_ps (),
8735 (__mmask8) __U);
8736}
8737
8738extern __inline __m256i
8739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8740_mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8741 __m128i __B)
8742{
8743 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8744 (__v4si) __B,
8745 (__v8si) __W,
8746 (__mmask8) __U);
8747}
8748
8749extern __inline __m256i
8750__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8751_mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8752{
8753 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8754 (__v4si) __B,
8755 (__v8si)
8756 _mm256_setzero_si256 (),
8757 (__mmask8) __U);
8758}
8759
8760extern __inline __m128i
8761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8762_mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8763 __m128i __B)
8764{
8765 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8766 (__v4si) __B,
8767 (__v4si) __W,
8768 (__mmask8) __U);
8769}
8770
8771extern __inline __m128i
8772__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8773_mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8774{
8775 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8776 (__v4si) __B,
8777 (__v4si)
8778 _mm_setzero_si128 (),
8779 (__mmask8) __U);
8780}
8781
8782extern __inline __m256i
8783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8784_mm256_sra_epi64 (__m256i __A, __m128i __B)
8785{
8786 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8787 (__v2di) __B,
8788 (__v4di)
8789 _mm256_setzero_si256 (),
8790 (__mmask8) -1);
8791}
8792
8793extern __inline __m256i
8794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8795_mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8796 __m128i __B)
8797{
8798 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8799 (__v2di) __B,
8800 (__v4di) __W,
8801 (__mmask8) __U);
8802}
8803
8804extern __inline __m256i
8805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8806_mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8807{
8808 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8809 (__v2di) __B,
8810 (__v4di)
8811 _mm256_setzero_si256 (),
8812 (__mmask8) __U);
8813}
8814
8815extern __inline __m128i
8816__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8817_mm_sra_epi64 (__m128i __A, __m128i __B)
8818{
8819 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8820 (__v2di) __B,
8821 (__v2di)
a25a7887 8822 _mm_setzero_si128 (),
936c0fe4
AI
8823 (__mmask8) -1);
8824}
8825
8826extern __inline __m128i
8827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8828_mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8829 __m128i __B)
8830{
8831 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8832 (__v2di) __B,
8833 (__v2di) __W,
8834 (__mmask8) __U);
8835}
8836
8837extern __inline __m128i
8838__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8839_mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8840{
8841 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8842 (__v2di) __B,
8843 (__v2di)
a25a7887 8844 _mm_setzero_si128 (),
936c0fe4
AI
8845 (__mmask8) __U);
8846}
8847
8848extern __inline __m128i
8849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8850_mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8851 __m128i __B)
8852{
8853 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8854 (__v4si) __B,
8855 (__v4si) __W,
8856 (__mmask8) __U);
8857}
8858
8859extern __inline __m128i
8860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8861_mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8862{
8863 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8864 (__v4si) __B,
8865 (__v4si)
8866 _mm_setzero_si128 (),
8867 (__mmask8) __U);
8868}
8869
8870extern __inline __m128i
8871__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8872_mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8873 __m128i __B)
8874{
8875 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8876 (__v2di) __B,
8877 (__v2di) __W,
8878 (__mmask8) __U);
8879}
8880
8881extern __inline __m128i
8882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8883_mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8884{
8885 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8886 (__v2di) __B,
8887 (__v2di)
a25a7887 8888 _mm_setzero_si128 (),
936c0fe4
AI
8889 (__mmask8) __U);
8890}
8891
8892extern __inline __m256i
8893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8894_mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8895 __m128i __B)
8896{
8897 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8898 (__v4si) __B,
8899 (__v8si) __W,
8900 (__mmask8) __U);
8901}
8902
8903extern __inline __m256i
8904__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8905_mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8906{
8907 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8908 (__v4si) __B,
8909 (__v8si)
8910 _mm256_setzero_si256 (),
8911 (__mmask8) __U);
8912}
8913
8914extern __inline __m256i
8915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8916_mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8917 __m128i __B)
8918{
8919 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8920 (__v2di) __B,
8921 (__v4di) __W,
8922 (__mmask8) __U);
8923}
8924
8925extern __inline __m256i
8926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8927_mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8928{
8929 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8930 (__v2di) __B,
8931 (__v4di)
8932 _mm256_setzero_si256 (),
8933 (__mmask8) __U);
8934}
8935
8936extern __inline __m256
8937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8938_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
8939 __m256 __Y)
8940{
8941 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8942 (__v8si) __X,
8943 (__v8sf) __W,
8944 (__mmask8) __U);
8945}
8946
8947extern __inline __m256
8948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8949_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
8950{
8951 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8952 (__v8si) __X,
8953 (__v8sf)
8954 _mm256_setzero_ps (),
8955 (__mmask8) __U);
8956}
8957
8958extern __inline __m256d
8959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8960_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8961{
8962 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8963 (__v4di) __X,
8964 (__v4df)
8965 _mm256_setzero_pd (),
8966 (__mmask8) -1);
8967}
8968
8969extern __inline __m256d
8970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8971_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8972 __m256d __Y)
8973{
8974 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8975 (__v4di) __X,
8976 (__v4df) __W,
8977 (__mmask8) __U);
8978}
8979
8980extern __inline __m256d
8981__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8982_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8983{
8984 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8985 (__v4di) __X,
8986 (__v4df)
8987 _mm256_setzero_pd (),
8988 (__mmask8) __U);
8989}
8990
8991extern __inline __m256d
8992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8993_mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
8994 __m256i __C)
8995{
8996 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8997 (__v4di) __C,
8998 (__v4df) __W,
8999 (__mmask8)
9000 __U);
9001}
9002
9003extern __inline __m256d
9004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9005_mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
9006{
9007 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
9008 (__v4di) __C,
9009 (__v4df)
9010 _mm256_setzero_pd (),
9011 (__mmask8)
9012 __U);
9013}
9014
9015extern __inline __m256
9016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9017_mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
9018 __m256i __C)
9019{
9020 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
9021 (__v8si) __C,
9022 (__v8sf) __W,
9023 (__mmask8) __U);
9024}
9025
9026extern __inline __m256
9027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9028_mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
9029{
9030 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
9031 (__v8si) __C,
9032 (__v8sf)
9033 _mm256_setzero_ps (),
9034 (__mmask8) __U);
9035}
9036
9037extern __inline __m128d
9038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9039_mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
9040 __m128i __C)
9041{
9042 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
9043 (__v2di) __C,
9044 (__v2df) __W,
9045 (__mmask8) __U);
9046}
9047
9048extern __inline __m128d
9049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9050_mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
9051{
9052 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
9053 (__v2di) __C,
9054 (__v2df)
9055 _mm_setzero_pd (),
9056 (__mmask8) __U);
9057}
9058
9059extern __inline __m128
9060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9061_mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
9062 __m128i __C)
9063{
9064 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
9065 (__v4si) __C,
9066 (__v4sf) __W,
9067 (__mmask8) __U);
9068}
9069
9070extern __inline __m128
9071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9072_mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
9073{
9074 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
9075 (__v4si) __C,
9076 (__v4sf)
9077 _mm_setzero_ps (),
9078 (__mmask8) __U);
9079}
9080
9081extern __inline __m256i
9082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9083_mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
9084{
9085 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9086 (__v8si) __B,
9087 (__v8si)
9088 _mm256_setzero_si256 (),
9089 __M);
9090}
9091
9092extern __inline __m256i
9093__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9094_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
9095{
9096 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9097 (__v4di) __X,
9098 (__v4di)
9099 _mm256_setzero_si256 (),
9100 __M);
9101}
9102
9103extern __inline __m256i
9104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9105_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
9106 __m256i __B)
9107{
9108 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9109 (__v8si) __B,
9110 (__v8si) __W, __M);
9111}
9112
9113extern __inline __m128i
9114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9115_mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
9116{
9117 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9118 (__v4si) __B,
9119 (__v4si)
9120 _mm_setzero_si128 (),
9121 __M);
9122}
9123
9124extern __inline __m128i
9125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
20e363e4 9126_mm_mask_mullo_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
936c0fe4
AI
9127 __m128i __B)
9128{
9129 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9130 (__v4si) __B,
9131 (__v4si) __W, __M);
9132}
9133
9134extern __inline __m256i
9135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9136_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9137 __m256i __Y)
9138{
9139 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9140 (__v8si) __Y,
9141 (__v4di) __W, __M);
9142}
9143
9144extern __inline __m256i
9145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9146_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9147{
9148 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9149 (__v8si) __Y,
9150 (__v4di)
9151 _mm256_setzero_si256 (),
9152 __M);
9153}
9154
9155extern __inline __m128i
9156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9157_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
9158 __m128i __Y)
9159{
9160 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9161 (__v4si) __Y,
9162 (__v2di) __W, __M);
9163}
9164
9165extern __inline __m128i
9166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9167_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
9168{
9169 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9170 (__v4si) __Y,
9171 (__v2di)
9172 _mm_setzero_si128 (),
9173 __M);
9174}
9175
395a191d
SP
9176extern __inline __m256i
9177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9178_mm256_permutexvar_epi64 (__m256i __X, __m256i __Y)
9179{
9180 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9181 (__v4di) __X,
9182 (__v4di)
9183 _mm256_setzero_si256 (),
9184 (__mmask8) -1);
9185}
9186
936c0fe4
AI
9187extern __inline __m256i
9188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9189_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
9190 __m256i __Y)
9191{
9192 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9193 (__v4di) __X,
9194 (__v4di) __W,
9195 __M);
9196}
9197
9198extern __inline __m256i
9199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9200_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
9201 __m256i __Y)
9202{
9203 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9204 (__v8si) __Y,
9205 (__v4di) __W, __M);
9206}
9207
9208extern __inline __m256i
9209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9210_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9211{
9212 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9213 (__v8si) __X,
9214 (__v8si)
9215 _mm256_setzero_si256 (),
9216 __M);
9217}
9218
9219extern __inline __m256i
9220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9221_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
9222{
9223 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9224 (__v8si) __Y,
9225 (__v4di)
9226 _mm256_setzero_si256 (),
9227 __M);
9228}
9229
9230extern __inline __m128i
9231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9232_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
9233 __m128i __Y)
9234{
9235 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9236 (__v4si) __Y,
9237 (__v2di) __W, __M);
9238}
9239
9240extern __inline __m128i
9241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9242_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
9243{
9244 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9245 (__v4si) __Y,
9246 (__v2di)
9247 _mm_setzero_si128 (),
9248 __M);
9249}
9250
395a191d
SP
9251extern __inline __m256i
9252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9253_mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
9254{
9255 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9256 (__v8si) __X,
9257 (__v8si)
9258 _mm256_setzero_si256 (),
9259 (__mmask8) -1);
9260}
9261
936c0fe4
AI
9262extern __inline __m256i
9263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9264_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9265 __m256i __Y)
9266{
9267 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9268 (__v8si) __X,
9269 (__v8si) __W,
9270 __M);
9271}
9272
6b62f323
JJ
9273extern __inline __mmask8
9274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9275_mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9276{
6b62f323
JJ
9277 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9278 (__v8si) __Y, 4,
936c0fe4
AI
9279 (__mmask8) __M);
9280}
9281
6b62f323
JJ
9282extern __inline __mmask8
9283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9284_mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
936c0fe4 9285{
6b62f323
JJ
9286 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9287 (__v8si) __Y, 4,
9288 (__mmask8) -1);
936c0fe4
AI
9289}
9290
6b62f323
JJ
9291extern __inline __mmask8
9292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9293_mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9294{
6b62f323
JJ
9295 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9296 (__v8si) __Y, 1,
9297 (__mmask8) __M);
936c0fe4
AI
9298}
9299
6b62f323
JJ
9300extern __inline __mmask8
9301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9302_mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
936c0fe4 9303{
6b62f323
JJ
9304 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9305 (__v8si) __Y, 1,
9306 (__mmask8) -1);
936c0fe4
AI
9307}
9308
6b62f323
JJ
9309extern __inline __mmask8
9310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9311_mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9312{
6b62f323
JJ
9313 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9314 (__v8si) __Y, 5,
9315 (__mmask8) __M);
936c0fe4
AI
9316}
9317
6b62f323
JJ
9318extern __inline __mmask8
9319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9320_mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
936c0fe4 9321{
6b62f323
JJ
9322 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9323 (__v8si) __Y, 5,
9324 (__mmask8) -1);
936c0fe4
AI
9325}
9326
6b62f323
JJ
9327extern __inline __mmask8
9328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9329_mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9330{
6b62f323
JJ
9331 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9332 (__v8si) __Y, 2,
9333 (__mmask8) __M);
936c0fe4
AI
9334}
9335
6b62f323
JJ
9336extern __inline __mmask8
9337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9338_mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
936c0fe4 9339{
6b62f323
JJ
9340 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9341 (__v8si) __Y, 2,
9342 (__mmask8) -1);
936c0fe4
AI
9343}
9344
6b62f323
JJ
9345extern __inline __mmask8
9346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9347_mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9348{
6b62f323
JJ
9349 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9350 (__v4di) __Y, 4,
9351 (__mmask8) __M);
936c0fe4
AI
9352}
9353
6b62f323
JJ
9354extern __inline __mmask8
9355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9356_mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
936c0fe4 9357{
6b62f323
JJ
9358 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9359 (__v4di) __Y, 4,
9360 (__mmask8) -1);
936c0fe4
AI
9361}
9362
6b62f323
JJ
9363extern __inline __mmask8
9364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9365_mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9366{
6b62f323
JJ
9367 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9368 (__v4di) __Y, 1,
9369 (__mmask8) __M);
936c0fe4
AI
9370}
9371
6b62f323
JJ
9372extern __inline __mmask8
9373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9374_mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
936c0fe4 9375{
6b62f323
JJ
9376 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9377 (__v4di) __Y, 1,
9378 (__mmask8) -1);
936c0fe4
AI
9379}
9380
6b62f323
JJ
9381extern __inline __mmask8
9382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9383_mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9384{
6b62f323
JJ
9385 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9386 (__v4di) __Y, 5,
9387 (__mmask8) __M);
936c0fe4
AI
9388}
9389
6b62f323
JJ
9390extern __inline __mmask8
9391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9392_mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
936c0fe4 9393{
6b62f323
JJ
9394 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9395 (__v4di) __Y, 5,
9396 (__mmask8) -1);
936c0fe4
AI
9397}
9398
6b62f323
JJ
9399extern __inline __mmask8
9400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9401_mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9402{
6b62f323
JJ
9403 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9404 (__v4di) __Y, 2,
9405 (__mmask8) __M);
936c0fe4
AI
9406}
9407
6b62f323
JJ
9408extern __inline __mmask8
9409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9410_mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
936c0fe4 9411{
6b62f323
JJ
9412 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9413 (__v4di) __Y, 2,
9414 (__mmask8) -1);
936c0fe4
AI
9415}
9416
6b62f323
JJ
9417extern __inline __mmask8
9418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9419_mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9420{
6b62f323
JJ
9421 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9422 (__v8si) __Y, 4,
9423 (__mmask8) __M);
936c0fe4
AI
9424}
9425
6b62f323
JJ
9426extern __inline __mmask8
9427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9428_mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
9429{
9430 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9431 (__v8si) __Y, 4,
9432 (__mmask8) -1);
936c0fe4
AI
9433}
9434
6b62f323
JJ
9435extern __inline __mmask8
9436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9437_mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9438{
6b62f323
JJ
9439 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9440 (__v8si) __Y, 1,
9441 (__mmask8) __M);
936c0fe4
AI
9442}
9443
6b62f323
JJ
9444extern __inline __mmask8
9445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9446_mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
936c0fe4 9447{
6b62f323
JJ
9448 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9449 (__v8si) __Y, 1,
9450 (__mmask8) -1);
936c0fe4
AI
9451}
9452
6b62f323
JJ
9453extern __inline __mmask8
9454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9455_mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9456{
6b62f323
JJ
9457 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9458 (__v8si) __Y, 5,
9459 (__mmask8) __M);
936c0fe4
AI
9460}
9461
6b62f323
JJ
9462extern __inline __mmask8
9463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9464_mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
936c0fe4 9465{
6b62f323
JJ
9466 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9467 (__v8si) __Y, 5,
9468 (__mmask8) -1);
936c0fe4
AI
9469}
9470
6b62f323
JJ
9471extern __inline __mmask8
9472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9473_mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9474{
6b62f323
JJ
9475 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9476 (__v8si) __Y, 2,
9477 (__mmask8) __M);
936c0fe4
AI
9478}
9479
6b62f323
JJ
9480extern __inline __mmask8
9481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9482_mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
936c0fe4 9483{
6b62f323
JJ
9484 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9485 (__v8si) __Y, 2,
9486 (__mmask8) -1);
936c0fe4
AI
9487}
9488
6b62f323
JJ
9489extern __inline __mmask8
9490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9491_mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9492{
6b62f323
JJ
9493 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9494 (__v4di) __Y, 4,
9495 (__mmask8) __M);
936c0fe4
AI
9496}
9497
6b62f323
JJ
9498extern __inline __mmask8
9499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9500_mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
936c0fe4 9501{
6b62f323
JJ
9502 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9503 (__v4di) __Y, 4,
9504 (__mmask8) -1);
936c0fe4
AI
9505}
9506
6b62f323
JJ
9507extern __inline __mmask8
9508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9509_mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9510{
6b62f323
JJ
9511 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9512 (__v4di) __Y, 1,
9513 (__mmask8) __M);
936c0fe4
AI
9514}
9515
6b62f323
JJ
9516extern __inline __mmask8
9517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9518_mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
936c0fe4 9519{
6b62f323
JJ
9520 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9521 (__v4di) __Y, 1,
9522 (__mmask8) -1);
936c0fe4
AI
9523}
9524
6b62f323
JJ
9525extern __inline __mmask8
9526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9527_mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9528{
6b62f323
JJ
9529 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9530 (__v4di) __Y, 5,
9531 (__mmask8) __M);
936c0fe4
AI
9532}
9533
6b62f323
JJ
9534extern __inline __mmask8
9535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9536_mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
936c0fe4 9537{
6b62f323
JJ
9538 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9539 (__v4di) __Y, 5,
9540 (__mmask8) -1);
936c0fe4
AI
9541}
9542
6b62f323
JJ
9543extern __inline __mmask8
9544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9545_mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
936c0fe4 9546{
6b62f323
JJ
9547 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9548 (__v4di) __Y, 2,
9549 (__mmask8) __M);
936c0fe4
AI
9550}
9551
6b62f323
JJ
9552extern __inline __mmask8
9553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9554_mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
936c0fe4 9555{
6b62f323
JJ
9556 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9557 (__v4di) __Y, 2,
9558 (__mmask8) -1);
936c0fe4
AI
9559}
9560
6b62f323
JJ
9561extern __inline __mmask8
9562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9563_mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9564{
6b62f323
JJ
9565 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9566 (__v4si) __Y, 4,
9567 (__mmask8) __M);
936c0fe4
AI
9568}
9569
6b62f323
JJ
9570extern __inline __mmask8
9571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9572_mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
936c0fe4 9573{
6b62f323
JJ
9574 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9575 (__v4si) __Y, 4,
9576 (__mmask8) -1);
936c0fe4
AI
9577}
9578
6b62f323
JJ
9579extern __inline __mmask8
9580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9581_mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9582{
6b62f323
JJ
9583 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9584 (__v4si) __Y, 1,
9585 (__mmask8) __M);
936c0fe4
AI
9586}
9587
6b62f323
JJ
9588extern __inline __mmask8
9589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9590_mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
936c0fe4 9591{
6b62f323
JJ
9592 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9593 (__v4si) __Y, 1,
9594 (__mmask8) -1);
936c0fe4
AI
9595}
9596
6b62f323
JJ
9597extern __inline __mmask8
9598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9599_mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9600{
6b62f323
JJ
9601 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9602 (__v4si) __Y, 5,
9603 (__mmask8) __M);
936c0fe4
AI
9604}
9605
6b62f323
JJ
9606extern __inline __mmask8
9607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9608_mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
936c0fe4 9609{
6b62f323
JJ
9610 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9611 (__v4si) __Y, 5,
9612 (__mmask8) -1);
936c0fe4
AI
9613}
9614
6b62f323
JJ
9615extern __inline __mmask8
9616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9617_mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9618{
6b62f323
JJ
9619 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9620 (__v4si) __Y, 2,
9621 (__mmask8) __M);
936c0fe4
AI
9622}
9623
6b62f323
JJ
9624extern __inline __mmask8
9625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9626_mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
936c0fe4 9627{
6b62f323
JJ
9628 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9629 (__v4si) __Y, 2,
9630 (__mmask8) -1);
936c0fe4
AI
9631}
9632
6b62f323
JJ
9633extern __inline __mmask8
9634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9635_mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9636{
6b62f323
JJ
9637 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9638 (__v2di) __Y, 4,
9639 (__mmask8) __M);
936c0fe4
AI
9640}
9641
6b62f323
JJ
9642extern __inline __mmask8
9643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9644_mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
936c0fe4 9645{
6b62f323
JJ
9646 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9647 (__v2di) __Y, 4,
9648 (__mmask8) -1);
936c0fe4
AI
9649}
9650
6b62f323
JJ
9651extern __inline __mmask8
9652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9653_mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9654{
6b62f323
JJ
9655 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9656 (__v2di) __Y, 1,
9657 (__mmask8) __M);
936c0fe4
AI
9658}
9659
6b62f323
JJ
9660extern __inline __mmask8
9661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9662_mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
936c0fe4 9663{
6b62f323
JJ
9664 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9665 (__v2di) __Y, 1,
9666 (__mmask8) -1);
936c0fe4
AI
9667}
9668
6b62f323
JJ
9669extern __inline __mmask8
9670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9671_mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9672{
6b62f323
JJ
9673 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9674 (__v2di) __Y, 5,
9675 (__mmask8) __M);
936c0fe4
AI
9676}
9677
6b62f323
JJ
9678extern __inline __mmask8
9679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9680_mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
936c0fe4 9681{
6b62f323
JJ
9682 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9683 (__v2di) __Y, 5,
9684 (__mmask8) -1);
936c0fe4
AI
9685}
9686
6b62f323
JJ
9687extern __inline __mmask8
9688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9689_mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9690{
6b62f323
JJ
9691 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9692 (__v2di) __Y, 2,
9693 (__mmask8) __M);
936c0fe4
AI
9694}
9695
6b62f323
JJ
9696extern __inline __mmask8
9697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9698_mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
936c0fe4 9699{
6b62f323
JJ
9700 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9701 (__v2di) __Y, 2,
9702 (__mmask8) -1);
936c0fe4
AI
9703}
9704
6b62f323
JJ
9705extern __inline __mmask8
9706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9707_mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9708{
6b62f323
JJ
9709 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9710 (__v4si) __Y, 4,
9711 (__mmask8) __M);
936c0fe4
AI
9712}
9713
6b62f323
JJ
9714extern __inline __mmask8
9715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9716_mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
9717{
9718 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9719 (__v4si) __Y, 4,
9720 (__mmask8) -1);
9721}
9722
9723extern __inline __mmask8
9724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9725_mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9726{
9727 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9728 (__v4si) __Y, 1,
9729 (__mmask8) __M);
9730}
9731
9732extern __inline __mmask8
9733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9734_mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
936c0fe4 9735{
6b62f323
JJ
9736 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9737 (__v4si) __Y, 1,
9738 (__mmask8) -1);
936c0fe4
AI
9739}
9740
6b62f323
JJ
9741extern __inline __mmask8
9742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9743_mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9744{
6b62f323
JJ
9745 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9746 (__v4si) __Y, 5,
9747 (__mmask8) __M);
936c0fe4
AI
9748}
9749
6b62f323
JJ
9750extern __inline __mmask8
9751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9752_mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
936c0fe4 9753{
6b62f323
JJ
9754 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9755 (__v4si) __Y, 5,
9756 (__mmask8) -1);
936c0fe4
AI
9757}
9758
6b62f323
JJ
9759extern __inline __mmask8
9760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9761_mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9762{
6b62f323
JJ
9763 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9764 (__v4si) __Y, 2,
9765 (__mmask8) __M);
936c0fe4
AI
9766}
9767
6b62f323
JJ
9768extern __inline __mmask8
9769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9770_mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
936c0fe4 9771{
6b62f323
JJ
9772 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9773 (__v4si) __Y, 2,
9774 (__mmask8) -1);
936c0fe4
AI
9775}
9776
6b62f323
JJ
9777extern __inline __mmask8
9778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9779_mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9780{
6b62f323
JJ
9781 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9782 (__v2di) __Y, 4,
9783 (__mmask8) __M);
936c0fe4
AI
9784}
9785
6b62f323
JJ
9786extern __inline __mmask8
9787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9788_mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
936c0fe4 9789{
6b62f323
JJ
9790 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9791 (__v2di) __Y, 4,
9792 (__mmask8) -1);
936c0fe4
AI
9793}
9794
6b62f323
JJ
9795extern __inline __mmask8
9796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9797_mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9798{
6b62f323
JJ
9799 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9800 (__v2di) __Y, 1,
9801 (__mmask8) __M);
936c0fe4
AI
9802}
9803
6b62f323
JJ
9804extern __inline __mmask8
9805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9806_mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
936c0fe4 9807{
6b62f323
JJ
9808 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9809 (__v2di) __Y, 1,
9810 (__mmask8) -1);
936c0fe4
AI
9811}
9812
6b62f323
JJ
9813extern __inline __mmask8
9814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9815_mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9816{
6b62f323
JJ
9817 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9818 (__v2di) __Y, 5,
9819 (__mmask8) __M);
936c0fe4
AI
9820}
9821
6b62f323
JJ
9822extern __inline __mmask8
9823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9824_mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
936c0fe4 9825{
6b62f323
JJ
9826 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9827 (__v2di) __Y, 5,
9828 (__mmask8) -1);
936c0fe4
AI
9829}
9830
6b62f323
JJ
9831extern __inline __mmask8
9832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9833_mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
936c0fe4 9834{
6b62f323
JJ
9835 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9836 (__v2di) __Y, 2,
9837 (__mmask8) __M);
936c0fe4
AI
9838}
9839
6b62f323
JJ
9840extern __inline __mmask8
9841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9842_mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
936c0fe4 9843{
6b62f323
JJ
9844 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9845 (__v2di) __Y, 2,
9846 (__mmask8) -1);
936c0fe4
AI
9847}
9848
6b62f323 9849#ifdef __OPTIMIZE__
395a191d
SP
9850extern __inline __m256i
9851__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9852_mm256_permutex_epi64 (__m256i __X, const int __I)
9853{
9854 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9855 __I,
9856 (__v4di)
9857 _mm256_setzero_si256(),
9858 (__mmask8) -1);
9859}
9860
6b62f323 9861extern __inline __m256i
936c0fe4 9862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9863_mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
9864 __m256i __X, const int __I)
936c0fe4 9865{
6b62f323
JJ
9866 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9867 __I,
9868 (__v4di) __W,
9869 (__mmask8) __M);
936c0fe4
AI
9870}
9871
6b62f323 9872extern __inline __m256i
936c0fe4 9873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 9874_mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I)
936c0fe4 9875{
6b62f323
JJ
9876 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9877 __I,
9878 (__v4di)
9879 _mm256_setzero_si256 (),
9880 (__mmask8) __M);
936c0fe4
AI
9881}
9882
6b62f323 9883extern __inline __m256d
936c0fe4 9884__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9885_mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A,
9886 __m256d __B, const int __imm)
936c0fe4 9887{
6b62f323
JJ
9888 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9889 (__v4df) __B, __imm,
9890 (__v4df) __W,
9891 (__mmask8) __U);
936c0fe4
AI
9892}
9893
6b62f323 9894extern __inline __m256d
936c0fe4 9895__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9896_mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B,
9897 const int __imm)
936c0fe4 9898{
6b62f323
JJ
9899 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9900 (__v4df) __B, __imm,
9901 (__v4df)
9902 _mm256_setzero_pd (),
9903 (__mmask8) __U);
936c0fe4
AI
9904}
9905
6b62f323 9906extern __inline __m128d
936c0fe4 9907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9908_mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A,
9909 __m128d __B, const int __imm)
936c0fe4 9910{
6b62f323
JJ
9911 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9912 (__v2df) __B, __imm,
9913 (__v2df) __W,
9914 (__mmask8) __U);
936c0fe4
AI
9915}
9916
6b62f323 9917extern __inline __m128d
936c0fe4 9918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9919_mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B,
9920 const int __imm)
936c0fe4 9921{
6b62f323
JJ
9922 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9923 (__v2df) __B, __imm,
9924 (__v2df)
9925 _mm_setzero_pd (),
9926 (__mmask8) __U);
936c0fe4
AI
9927}
9928
9929extern __inline __m256
9930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9931_mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A,
9932 __m256 __B, const int __imm)
936c0fe4 9933{
6b62f323
JJ
9934 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9935 (__v8sf) __B, __imm,
9936 (__v8sf) __W,
9937 (__mmask8) __U);
936c0fe4
AI
9938}
9939
6b62f323 9940extern __inline __m256
936c0fe4 9941__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9942_mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B,
9943 const int __imm)
936c0fe4 9944{
6b62f323
JJ
9945 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9946 (__v8sf) __B, __imm,
9947 (__v8sf)
9948 _mm256_setzero_ps (),
9949 (__mmask8) __U);
936c0fe4
AI
9950}
9951
6b62f323 9952extern __inline __m128
936c0fe4 9953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9954_mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
9955 const int __imm)
936c0fe4 9956{
6b62f323
JJ
9957 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9958 (__v4sf) __B, __imm,
9959 (__v4sf) __W,
9960 (__mmask8) __U);
936c0fe4
AI
9961}
9962
6b62f323 9963extern __inline __m128
936c0fe4 9964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9965_mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B,
9966 const int __imm)
936c0fe4 9967{
6b62f323
JJ
9968 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9969 (__v4sf) __B, __imm,
9970 (__v4sf)
9971 _mm_setzero_ps (),
9972 (__mmask8) __U);
936c0fe4
AI
9973}
9974
6b62f323 9975extern __inline __m256i
936c0fe4 9976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 9977_mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm)
936c0fe4 9978{
6b62f323
JJ
9979 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9980 (__v4si) __B,
9981 __imm,
9982 (__v8si)
9983 _mm256_setzero_si256 (),
9984 (__mmask8) -1);
936c0fe4
AI
9985}
9986
6b62f323 9987extern __inline __m256i
936c0fe4 9988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
9989_mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A,
9990 __m128i __B, const int __imm)
936c0fe4 9991{
6b62f323
JJ
9992 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9993 (__v4si) __B,
9994 __imm,
9995 (__v8si) __W,
9996 (__mmask8)
9997 __U);
936c0fe4
AI
9998}
9999
6b62f323 10000extern __inline __m256i
936c0fe4 10001__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10002_mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B,
10003 const int __imm)
936c0fe4 10004{
6b62f323
JJ
10005 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
10006 (__v4si) __B,
10007 __imm,
10008 (__v8si)
10009 _mm256_setzero_si256 (),
10010 (__mmask8)
10011 __U);
936c0fe4
AI
10012}
10013
6b62f323 10014extern __inline __m256
936c0fe4 10015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10016_mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm)
936c0fe4 10017{
6b62f323
JJ
10018 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
10019 (__v4sf) __B,
936c0fe4 10020 __imm,
6b62f323
JJ
10021 (__v8sf)
10022 _mm256_setzero_ps (),
936c0fe4
AI
10023 (__mmask8) -1);
10024}
10025
6b62f323 10026extern __inline __m256
936c0fe4 10027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10028_mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A,
10029 __m128 __B, const int __imm)
936c0fe4 10030{
6b62f323
JJ
10031 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
10032 (__v4sf) __B,
936c0fe4 10033 __imm,
6b62f323 10034 (__v8sf) __W,
936c0fe4
AI
10035 (__mmask8) __U);
10036}
10037
6b62f323 10038extern __inline __m256
936c0fe4 10039__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10040_mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B,
10041 const int __imm)
936c0fe4 10042{
6b62f323
JJ
10043 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
10044 (__v4sf) __B,
936c0fe4 10045 __imm,
6b62f323
JJ
10046 (__v8sf)
10047 _mm256_setzero_ps (),
936c0fe4
AI
10048 (__mmask8) __U);
10049}
10050
6b62f323 10051extern __inline __m128i
936c0fe4 10052__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10053_mm256_extracti32x4_epi32 (__m256i __A, const int __imm)
936c0fe4 10054{
6b62f323
JJ
10055 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
10056 __imm,
10057 (__v4si)
10058 _mm_setzero_si128 (),
10059 (__mmask8) -1);
936c0fe4
AI
10060}
10061
6b62f323 10062extern __inline __m128i
936c0fe4 10063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10064_mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A,
10065 const int __imm)
936c0fe4 10066{
6b62f323
JJ
10067 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
10068 __imm,
10069 (__v4si) __W,
10070 (__mmask8)
10071 __U);
936c0fe4
AI
10072}
10073
6b62f323 10074extern __inline __m128i
936c0fe4 10075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10076_mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A,
10077 const int __imm)
936c0fe4 10078{
6b62f323
JJ
10079 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
10080 __imm,
10081 (__v4si)
10082 _mm_setzero_si128 (),
10083 (__mmask8)
10084 __U);
936c0fe4
AI
10085}
10086
10087extern __inline __m128
10088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10089_mm256_extractf32x4_ps (__m256 __A, const int __imm)
936c0fe4 10090{
6b62f323
JJ
10091 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10092 __imm,
10093 (__v4sf)
10094 _mm_setzero_ps (),
10095 (__mmask8) -1);
936c0fe4
AI
10096}
10097
10098extern __inline __m128
10099__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10100_mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A,
10101 const int __imm)
936c0fe4 10102{
6b62f323
JJ
10103 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10104 __imm,
10105 (__v4sf) __W,
10106 (__mmask8)
10107 __U);
936c0fe4
AI
10108}
10109
10110extern __inline __m128
10111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10112_mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A,
10113 const int __imm)
10114{
10115 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10116 __imm,
10117 (__v4sf)
10118 _mm_setzero_ps (),
10119 (__mmask8)
10120 __U);
10121}
10122
10123extern __inline __m256i
10124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10125_mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm)
10126{
10127 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10128 (__v4di) __B,
10129 __imm,
10130 (__v4di)
10131 _mm256_setzero_si256 (),
10132 (__mmask8) -1);
10133}
10134
10135extern __inline __m256i
10136__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10137_mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A,
10138 __m256i __B, const int __imm)
936c0fe4 10139{
6b62f323
JJ
10140 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10141 (__v4di) __B,
10142 __imm,
10143 (__v4di) __W,
10144 (__mmask8) __U);
936c0fe4
AI
10145}
10146
6b62f323 10147extern __inline __m256i
936c0fe4 10148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10149_mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B,
10150 const int __imm)
936c0fe4 10151{
6b62f323
JJ
10152 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10153 (__v4di) __B,
10154 __imm,
10155 (__v4di)
10156 _mm256_setzero_si256 (),
10157 (__mmask8) __U);
936c0fe4
AI
10158}
10159
6b62f323 10160extern __inline __m256i
936c0fe4 10161__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10162_mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm)
936c0fe4 10163{
6b62f323
JJ
10164 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10165 (__v8si) __B,
10166 __imm,
10167 (__v8si)
10168 _mm256_setzero_si256 (),
10169 (__mmask8) -1);
936c0fe4
AI
10170}
10171
6b62f323 10172extern __inline __m256i
936c0fe4 10173__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10174_mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A,
10175 __m256i __B, const int __imm)
936c0fe4 10176{
6b62f323
JJ
10177 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10178 (__v8si) __B,
10179 __imm,
10180 (__v8si) __W,
10181 (__mmask8) __U);
936c0fe4
AI
10182}
10183
6b62f323 10184extern __inline __m256i
936c0fe4 10185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10186_mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B,
10187 const int __imm)
936c0fe4 10188{
6b62f323
JJ
10189 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10190 (__v8si) __B,
10191 __imm,
10192 (__v8si)
10193 _mm256_setzero_si256 (),
10194 (__mmask8) __U);
936c0fe4
AI
10195}
10196
6b62f323 10197extern __inline __m256d
936c0fe4 10198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10199_mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm)
936c0fe4 10200{
6b62f323
JJ
10201 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10202 (__v4df) __B,
10203 __imm,
10204 (__v4df)
10205 _mm256_setzero_pd (),
10206 (__mmask8) -1);
936c0fe4
AI
10207}
10208
6b62f323 10209extern __inline __m256d
936c0fe4 10210__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10211_mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A,
10212 __m256d __B, const int __imm)
936c0fe4 10213{
6b62f323
JJ
10214 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10215 (__v4df) __B,
10216 __imm,
10217 (__v4df) __W,
10218 (__mmask8) __U);
936c0fe4
AI
10219}
10220
6b62f323 10221extern __inline __m256d
936c0fe4 10222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10223_mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B,
10224 const int __imm)
936c0fe4 10225{
6b62f323
JJ
10226 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10227 (__v4df) __B,
10228 __imm,
10229 (__v4df)
10230 _mm256_setzero_pd (),
10231 (__mmask8) __U);
936c0fe4
AI
10232}
10233
6b62f323 10234extern __inline __m256
936c0fe4 10235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10236_mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm)
936c0fe4 10237{
6b62f323
JJ
10238 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10239 (__v8sf) __B,
10240 __imm,
10241 (__v8sf)
10242 _mm256_setzero_ps (),
10243 (__mmask8) -1);
936c0fe4
AI
10244}
10245
6b62f323 10246extern __inline __m256
936c0fe4 10247__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10248_mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A,
10249 __m256 __B, const int __imm)
936c0fe4 10250{
6b62f323
JJ
10251 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10252 (__v8sf) __B,
10253 __imm,
10254 (__v8sf) __W,
10255 (__mmask8) __U);
936c0fe4
AI
10256}
10257
6b62f323 10258extern __inline __m256
936c0fe4 10259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10260_mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B,
10261 const int __imm)
936c0fe4 10262{
6b62f323
JJ
10263 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10264 (__v8sf) __B,
10265 __imm,
10266 (__v8sf)
10267 _mm256_setzero_ps (),
10268 (__mmask8) __U);
936c0fe4
AI
10269}
10270
6b62f323 10271extern __inline __m256d
936c0fe4 10272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 10273_mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C,
6b62f323 10274 const int __imm)
936c0fe4 10275{
040d2bba
WX
10276 return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
10277 (__v4df) __B,
10278 (__v4di) __C,
10279 __imm,
10280 (__mmask8) -1);
936c0fe4
AI
10281}
10282
6b62f323 10283extern __inline __m256d
936c0fe4 10284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
10285_mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B,
10286 __m256i __C, const int __imm)
936c0fe4 10287{
6b62f323 10288 return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
040d2bba
WX
10289 (__v4df) __B,
10290 (__v4di) __C,
6b62f323
JJ
10291 __imm,
10292 (__mmask8) __U);
936c0fe4
AI
10293}
10294
10295extern __inline __m256d
10296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
10297_mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B,
10298 __m256i __C, const int __imm)
936c0fe4 10299{
6b62f323 10300 return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A,
040d2bba
WX
10301 (__v4df) __B,
10302 (__v4di) __C,
6b62f323
JJ
10303 __imm,
10304 (__mmask8) __U);
936c0fe4
AI
10305}
10306
6b62f323 10307extern __inline __m256
936c0fe4 10308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 10309_mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C,
6b62f323 10310 const int __imm)
936c0fe4 10311{
040d2bba
WX
10312 return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
10313 (__v8sf) __B,
10314 (__v8si) __C,
10315 __imm,
10316 (__mmask8) -1);
936c0fe4
AI
10317}
10318
6b62f323 10319extern __inline __m256
936c0fe4 10320__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
10321_mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B,
10322 __m256i __C, const int __imm)
936c0fe4 10323{
6b62f323 10324 return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
040d2bba
WX
10325 (__v8sf) __B,
10326 (__v8si) __C,
6b62f323
JJ
10327 __imm,
10328 (__mmask8) __U);
936c0fe4
AI
10329}
10330
6b62f323 10331extern __inline __m256
936c0fe4 10332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
10333_mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B,
10334 __m256i __C, const int __imm)
936c0fe4 10335{
6b62f323 10336 return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A,
040d2bba
WX
10337 (__v8sf) __B,
10338 (__v8si) __C,
6b62f323
JJ
10339 __imm,
10340 (__mmask8) __U);
936c0fe4
AI
10341}
10342
6b62f323 10343extern __inline __m128d
936c0fe4 10344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 10345_mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C,
6b62f323 10346 const int __imm)
936c0fe4 10347{
040d2bba
WX
10348 return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
10349 (__v2df) __B,
10350 (__v2di) __C,
10351 __imm,
10352 (__mmask8) -1);
936c0fe4
AI
10353}
10354
6b62f323 10355extern __inline __m128d
936c0fe4 10356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
10357_mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B,
10358 __m128i __C, const int __imm)
936c0fe4 10359{
6b62f323 10360 return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
040d2bba
WX
10361 (__v2df) __B,
10362 (__v2di) __C,
6b62f323
JJ
10363 __imm,
10364 (__mmask8) __U);
936c0fe4
AI
10365}
10366
6b62f323 10367extern __inline __m128d
936c0fe4 10368__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
10369_mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B,
10370 __m128i __C, const int __imm)
936c0fe4 10371{
6b62f323 10372 return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A,
040d2bba
WX
10373 (__v2df) __B,
10374 (__v2di) __C,
6b62f323
JJ
10375 __imm,
10376 (__mmask8) __U);
936c0fe4
AI
10377}
10378
6b62f323 10379extern __inline __m128
936c0fe4 10380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba 10381_mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm)
936c0fe4 10382{
040d2bba
WX
10383 return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
10384 (__v4sf) __B,
10385 (__v4si) __C,
10386 __imm,
10387 (__mmask8) -1);
936c0fe4
AI
10388}
10389
6b62f323 10390extern __inline __m128
936c0fe4 10391__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
10392_mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B,
10393 __m128i __C, const int __imm)
936c0fe4 10394{
6b62f323 10395 return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
040d2bba
WX
10396 (__v4sf) __B,
10397 (__v4si) __C,
6b62f323
JJ
10398 __imm,
10399 (__mmask8) __U);
936c0fe4
AI
10400}
10401
6b62f323 10402extern __inline __m128
936c0fe4 10403__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
040d2bba
WX
10404_mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B,
10405 __m128i __C, const int __imm)
936c0fe4 10406{
6b62f323 10407 return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A,
040d2bba
WX
10408 (__v4sf) __B,
10409 (__v4si) __C,
6b62f323
JJ
10410 __imm,
10411 (__mmask8) __U);
936c0fe4
AI
10412}
10413
6b62f323 10414extern __inline __m256i
936c0fe4 10415__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10416_mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10417 const int __imm)
936c0fe4 10418{
6b62f323
JJ
10419 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
10420 (__v8si) __W,
10421 (__mmask8) __U);
936c0fe4
AI
10422}
10423
6b62f323 10424extern __inline __m256i
936c0fe4 10425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10426_mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm)
936c0fe4 10427{
6b62f323
JJ
10428 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
10429 (__v8si)
10430 _mm256_setzero_si256 (),
10431 (__mmask8) __U);
936c0fe4
AI
10432}
10433
6b62f323 10434extern __inline __m128i
936c0fe4 10435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10436_mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10437 const int __imm)
936c0fe4 10438{
6b62f323
JJ
10439 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
10440 (__v4si) __W,
10441 (__mmask8) __U);
936c0fe4
AI
10442}
10443
6b62f323 10444extern __inline __m128i
936c0fe4 10445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10446_mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm)
936c0fe4 10447{
6b62f323
JJ
10448 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
10449 (__v4si)
10450 _mm_setzero_si128 (),
10451 (__mmask8) __U);
936c0fe4
AI
10452}
10453
6b62f323 10454extern __inline __m256i
936c0fe4 10455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10456_mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
10457 const int __imm)
936c0fe4 10458{
6b62f323
JJ
10459 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
10460 (__v4di) __W,
10461 (__mmask8) __U);
936c0fe4
AI
10462}
10463
6b62f323 10464extern __inline __m256i
936c0fe4 10465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10466_mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm)
936c0fe4 10467{
6b62f323
JJ
10468 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
10469 (__v4di)
10470 _mm256_setzero_si256 (),
10471 (__mmask8) __U);
936c0fe4
AI
10472}
10473
6b62f323 10474extern __inline __m128i
936c0fe4 10475__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10476_mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
10477 const int __imm)
936c0fe4 10478{
6b62f323
JJ
10479 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
10480 (__v2di) __W,
10481 (__mmask8) __U);
936c0fe4
AI
10482}
10483
6b62f323 10484extern __inline __m128i
936c0fe4 10485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10486_mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm)
936c0fe4 10487{
6b62f323
JJ
10488 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
10489 (__v2di)
10490 _mm_setzero_si128 (),
10491 (__mmask8) __U);
936c0fe4
AI
10492}
10493
6b62f323 10494extern __inline __m256i
936c0fe4 10495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10496_mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
10497 const int __imm)
936c0fe4 10498{
6b62f323
JJ
10499 return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
10500 (__v4di) __B,
10501 (__v4di) __C, __imm,
10502 (__mmask8) -1);
936c0fe4
AI
10503}
10504
6b62f323 10505extern __inline __m256i
936c0fe4 10506__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10507_mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
10508 __m256i __B, __m256i __C,
10509 const int __imm)
936c0fe4 10510{
6b62f323
JJ
10511 return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
10512 (__v4di) __B,
10513 (__v4di) __C, __imm,
10514 (__mmask8) __U);
936c0fe4
AI
10515}
10516
6b62f323 10517extern __inline __m256i
936c0fe4 10518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10519_mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
10520 __m256i __B, __m256i __C,
10521 const int __imm)
936c0fe4 10522{
6b62f323
JJ
10523 return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
10524 (__v4di) __B,
10525 (__v4di) __C,
10526 __imm,
10527 (__mmask8) __U);
936c0fe4
AI
10528}
10529
6b62f323 10530extern __inline __m256i
936c0fe4 10531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10532_mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
10533 const int __imm)
936c0fe4 10534{
6b62f323
JJ
10535 return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
10536 (__v8si) __B,
10537 (__v8si) __C, __imm,
10538 (__mmask8) -1);
936c0fe4
AI
10539}
10540
6b62f323 10541extern __inline __m256i
936c0fe4 10542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10543_mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
10544 __m256i __B, __m256i __C,
10545 const int __imm)
936c0fe4 10546{
6b62f323
JJ
10547 return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
10548 (__v8si) __B,
10549 (__v8si) __C, __imm,
10550 (__mmask8) __U);
936c0fe4
AI
10551}
10552
6b62f323 10553extern __inline __m256i
936c0fe4 10554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10555_mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
10556 __m256i __B, __m256i __C,
10557 const int __imm)
936c0fe4 10558{
6b62f323
JJ
10559 return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
10560 (__v8si) __B,
10561 (__v8si) __C,
10562 __imm,
10563 (__mmask8) __U);
936c0fe4
AI
10564}
10565
6b62f323 10566extern __inline __m128i
936c0fe4 10567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10568_mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
10569 const int __imm)
936c0fe4 10570{
6b62f323
JJ
10571 return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
10572 (__v2di) __B,
10573 (__v2di) __C, __imm,
10574 (__mmask8) -1);
936c0fe4
AI
10575}
10576
6b62f323 10577extern __inline __m128i
936c0fe4 10578__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10579_mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
10580 __m128i __B, __m128i __C, const int __imm)
936c0fe4 10581{
6b62f323
JJ
10582 return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
10583 (__v2di) __B,
10584 (__v2di) __C, __imm,
10585 (__mmask8) __U);
936c0fe4
AI
10586}
10587
6b62f323 10588extern __inline __m128i
936c0fe4 10589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10590_mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
10591 __m128i __B, __m128i __C, const int __imm)
936c0fe4 10592{
6b62f323
JJ
10593 return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
10594 (__v2di) __B,
10595 (__v2di) __C,
10596 __imm,
10597 (__mmask8) __U);
936c0fe4
AI
10598}
10599
6b62f323 10600extern __inline __m128i
936c0fe4 10601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10602_mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
10603 const int __imm)
936c0fe4 10604{
6b62f323
JJ
10605 return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
10606 (__v4si) __B,
10607 (__v4si) __C, __imm,
10608 (__mmask8) -1);
936c0fe4
AI
10609}
10610
6b62f323 10611extern __inline __m128i
936c0fe4 10612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10613_mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
10614 __m128i __B, __m128i __C, const int __imm)
936c0fe4 10615{
6b62f323
JJ
10616 return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
10617 (__v4si) __B,
10618 (__v4si) __C, __imm,
10619 (__mmask8) __U);
936c0fe4
AI
10620}
10621
6b62f323 10622extern __inline __m128i
936c0fe4 10623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10624_mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
10625 __m128i __B, __m128i __C, const int __imm)
936c0fe4 10626{
6b62f323
JJ
10627 return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
10628 (__v4si) __B,
10629 (__v4si) __C,
10630 __imm,
10631 (__mmask8) __U);
936c0fe4
AI
10632}
10633
6b62f323 10634extern __inline __m256
936c0fe4 10635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10636_mm256_roundscale_ps (__m256 __A, const int __imm)
936c0fe4 10637{
6b62f323
JJ
10638 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10639 __imm,
10640 (__v8sf)
10641 _mm256_setzero_ps (),
10642 (__mmask8) -1);
936c0fe4
AI
10643}
10644
6b62f323 10645extern __inline __m256
936c0fe4 10646__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10647_mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A,
10648 const int __imm)
936c0fe4 10649{
6b62f323
JJ
10650 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10651 __imm,
10652 (__v8sf) __W,
10653 (__mmask8) __U);
936c0fe4
AI
10654}
10655
6b62f323 10656extern __inline __m256
936c0fe4 10657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10658_mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm)
936c0fe4 10659{
6b62f323
JJ
10660 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10661 __imm,
10662 (__v8sf)
10663 _mm256_setzero_ps (),
10664 (__mmask8) __U);
936c0fe4
AI
10665}
10666
6b62f323 10667extern __inline __m256d
936c0fe4 10668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10669_mm256_roundscale_pd (__m256d __A, const int __imm)
936c0fe4 10670{
6b62f323
JJ
10671 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10672 __imm,
10673 (__v4df)
10674 _mm256_setzero_pd (),
10675 (__mmask8) -1);
936c0fe4
AI
10676}
10677
6b62f323 10678extern __inline __m256d
936c0fe4 10679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10680_mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A,
10681 const int __imm)
936c0fe4 10682{
6b62f323
JJ
10683 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10684 __imm,
10685 (__v4df) __W,
10686 (__mmask8) __U);
936c0fe4
AI
10687}
10688
6b62f323 10689extern __inline __m256d
936c0fe4 10690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10691_mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm)
936c0fe4 10692{
6b62f323
JJ
10693 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10694 __imm,
10695 (__v4df)
10696 _mm256_setzero_pd (),
10697 (__mmask8) __U);
936c0fe4
AI
10698}
10699
6b62f323 10700extern __inline __m128
936c0fe4 10701__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10702_mm_roundscale_ps (__m128 __A, const int __imm)
936c0fe4 10703{
6b62f323
JJ
10704 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10705 __imm,
10706 (__v4sf)
10707 _mm_setzero_ps (),
10708 (__mmask8) -1);
936c0fe4
AI
10709}
10710
6b62f323 10711extern __inline __m128
936c0fe4 10712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10713_mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A,
10714 const int __imm)
936c0fe4 10715{
6b62f323
JJ
10716 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10717 __imm,
10718 (__v4sf) __W,
10719 (__mmask8) __U);
936c0fe4
AI
10720}
10721
6b62f323 10722extern __inline __m128
936c0fe4 10723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10724_mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm)
936c0fe4 10725{
6b62f323
JJ
10726 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10727 __imm,
10728 (__v4sf)
10729 _mm_setzero_ps (),
10730 (__mmask8) __U);
936c0fe4
AI
10731}
10732
6b62f323 10733extern __inline __m128d
936c0fe4 10734__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10735_mm_roundscale_pd (__m128d __A, const int __imm)
936c0fe4 10736{
6b62f323
JJ
10737 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10738 __imm,
10739 (__v2df)
10740 _mm_setzero_pd (),
10741 (__mmask8) -1);
936c0fe4
AI
10742}
10743
6b62f323 10744extern __inline __m128d
936c0fe4 10745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10746_mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A,
10747 const int __imm)
936c0fe4 10748{
6b62f323
JJ
10749 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10750 __imm,
10751 (__v2df) __W,
10752 (__mmask8) __U);
936c0fe4
AI
10753}
10754
6b62f323 10755extern __inline __m128d
936c0fe4 10756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 10757_mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm)
936c0fe4 10758{
6b62f323
JJ
10759 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10760 __imm,
10761 (__v2df)
10762 _mm_setzero_pd (),
10763 (__mmask8) __U);
936c0fe4
AI
10764}
10765
6b62f323 10766extern __inline __m256
936c0fe4 10767__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10768_mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
10769 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10770{
6b62f323
JJ
10771 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10772 (__C << 2) | __B,
10773 (__v8sf)
10774 _mm256_setzero_ps (),
10775 (__mmask8) -1);
936c0fe4
AI
10776}
10777
6b62f323 10778extern __inline __m256
936c0fe4 10779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10780_mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A,
10781 _MM_MANTISSA_NORM_ENUM __B,
10782 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10783{
6b62f323
JJ
10784 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10785 (__C << 2) | __B,
10786 (__v8sf) __W,
10787 (__mmask8) __U);
936c0fe4
AI
10788}
10789
6b62f323 10790extern __inline __m256
936c0fe4 10791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10792_mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A,
10793 _MM_MANTISSA_NORM_ENUM __B,
10794 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10795{
6b62f323
JJ
10796 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10797 (__C << 2) | __B,
10798 (__v8sf)
10799 _mm256_setzero_ps (),
10800 (__mmask8) __U);
936c0fe4
AI
10801}
10802
6b62f323 10803extern __inline __m128
936c0fe4 10804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10805_mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B,
10806 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10807{
6b62f323
JJ
10808 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10809 (__C << 2) | __B,
10810 (__v4sf)
10811 _mm_setzero_ps (),
10812 (__mmask8) -1);
936c0fe4
AI
10813}
10814
6b62f323 10815extern __inline __m128
936c0fe4 10816__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10817_mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A,
10818 _MM_MANTISSA_NORM_ENUM __B,
10819 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10820{
6b62f323
JJ
10821 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10822 (__C << 2) | __B,
10823 (__v4sf) __W,
10824 (__mmask8) __U);
936c0fe4
AI
10825}
10826
6b62f323 10827extern __inline __m128
936c0fe4 10828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10829_mm_maskz_getmant_ps (__mmask8 __U, __m128 __A,
10830 _MM_MANTISSA_NORM_ENUM __B,
10831 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10832{
6b62f323
JJ
10833 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10834 (__C << 2) | __B,
10835 (__v4sf)
10836 _mm_setzero_ps (),
10837 (__mmask8) __U);
936c0fe4
AI
10838}
10839
6b62f323 10840extern __inline __m256d
936c0fe4 10841__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10842_mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
10843 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10844{
6b62f323
JJ
10845 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10846 (__C << 2) | __B,
10847 (__v4df)
10848 _mm256_setzero_pd (),
10849 (__mmask8) -1);
936c0fe4
AI
10850}
10851
6b62f323 10852extern __inline __m256d
936c0fe4 10853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10854_mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A,
10855 _MM_MANTISSA_NORM_ENUM __B,
10856 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10857{
6b62f323
JJ
10858 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10859 (__C << 2) | __B,
10860 (__v4df) __W,
10861 (__mmask8) __U);
936c0fe4
AI
10862}
10863
6b62f323 10864extern __inline __m256d
936c0fe4 10865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10866_mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A,
10867 _MM_MANTISSA_NORM_ENUM __B,
10868 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10869{
6b62f323
JJ
10870 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10871 (__C << 2) | __B,
10872 (__v4df)
10873 _mm256_setzero_pd (),
10874 (__mmask8) __U);
936c0fe4
AI
10875}
10876
6b62f323 10877extern __inline __m128d
936c0fe4 10878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10879_mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B,
10880 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10881{
6b62f323
JJ
10882 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10883 (__C << 2) | __B,
10884 (__v2df)
10885 _mm_setzero_pd (),
10886 (__mmask8) -1);
936c0fe4
AI
10887}
10888
6b62f323 10889extern __inline __m128d
936c0fe4 10890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10891_mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A,
10892 _MM_MANTISSA_NORM_ENUM __B,
10893 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10894{
6b62f323
JJ
10895 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10896 (__C << 2) | __B,
10897 (__v2df) __W,
10898 (__mmask8) __U);
936c0fe4
AI
10899}
10900
6b62f323 10901extern __inline __m128d
936c0fe4 10902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10903_mm_maskz_getmant_pd (__mmask8 __U, __m128d __A,
10904 _MM_MANTISSA_NORM_ENUM __B,
10905 _MM_MANTISSA_SIGN_ENUM __C)
936c0fe4 10906{
6b62f323
JJ
10907 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10908 (__C << 2) | __B,
10909 (__v2df)
10910 _mm_setzero_pd (),
10911 (__mmask8) __U);
936c0fe4
AI
10912}
10913
6b62f323 10914extern __inline __m256
936c0fe4 10915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10916_mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask,
10917 __m256i __index, void const *__addr,
10918 int __scale)
936c0fe4 10919{
6b62f323
JJ
10920 return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old,
10921 __addr,
10922 (__v8si) __index,
10923 __mask, __scale);
936c0fe4
AI
10924}
10925
6b62f323 10926extern __inline __m128
936c0fe4 10927__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10928_mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask,
10929 __m128i __index, void const *__addr,
10930 int __scale)
936c0fe4 10931{
6b62f323
JJ
10932 return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old,
10933 __addr,
10934 (__v4si) __index,
10935 __mask, __scale);
936c0fe4
AI
10936}
10937
6b62f323 10938extern __inline __m256d
936c0fe4 10939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10940_mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask,
10941 __m128i __index, void const *__addr,
10942 int __scale)
936c0fe4 10943{
6b62f323
JJ
10944 return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old,
10945 __addr,
10946 (__v4si) __index,
10947 __mask, __scale);
936c0fe4
AI
10948}
10949
6b62f323 10950extern __inline __m128d
936c0fe4 10951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10952_mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask,
10953 __m128i __index, void const *__addr,
10954 int __scale)
936c0fe4 10955{
6b62f323
JJ
10956 return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old,
10957 __addr,
10958 (__v4si) __index,
10959 __mask, __scale);
936c0fe4
AI
10960}
10961
6b62f323 10962extern __inline __m128
936c0fe4 10963__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10964_mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10965 __m256i __index, void const *__addr,
10966 int __scale)
10967{
10968 return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old,
10969 __addr,
10970 (__v4di) __index,
10971 __mask, __scale);
936c0fe4
AI
10972}
10973
6b62f323 10974extern __inline __m128
936c0fe4 10975__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10976_mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10977 __m128i __index, void const *__addr,
10978 int __scale)
936c0fe4 10979{
6b62f323
JJ
10980 return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old,
10981 __addr,
10982 (__v2di) __index,
10983 __mask, __scale);
936c0fe4
AI
10984}
10985
6b62f323 10986extern __inline __m256d
936c0fe4 10987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
10988_mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask,
10989 __m256i __index, void const *__addr,
10990 int __scale)
936c0fe4 10991{
6b62f323
JJ
10992 return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old,
10993 __addr,
10994 (__v4di) __index,
10995 __mask, __scale);
936c0fe4
AI
10996}
10997
6b62f323 10998extern __inline __m128d
936c0fe4 10999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11000_mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask,
11001 __m128i __index, void const *__addr,
11002 int __scale)
936c0fe4 11003{
6b62f323
JJ
11004 return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old,
11005 __addr,
11006 (__v2di) __index,
11007 __mask, __scale);
936c0fe4
AI
11008}
11009
6b62f323 11010extern __inline __m256i
936c0fe4 11011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11012_mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask,
11013 __m256i __index, void const *__addr,
11014 int __scale)
936c0fe4 11015{
6b62f323
JJ
11016 return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old,
11017 __addr,
11018 (__v8si) __index,
11019 __mask, __scale);
936c0fe4
AI
11020}
11021
11022extern __inline __m128i
11023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11024_mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask,
11025 __m128i __index, void const *__addr,
11026 int __scale)
936c0fe4 11027{
6b62f323
JJ
11028 return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old,
11029 __addr,
11030 (__v4si) __index,
11031 __mask, __scale);
936c0fe4
AI
11032}
11033
11034extern __inline __m256i
11035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11036_mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask,
11037 __m128i __index, void const *__addr,
11038 int __scale)
936c0fe4 11039{
6b62f323
JJ
11040 return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old,
11041 __addr,
11042 (__v4si) __index,
11043 __mask, __scale);
936c0fe4
AI
11044}
11045
6b62f323 11046extern __inline __m128i
936c0fe4 11047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11048_mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask,
11049 __m128i __index, void const *__addr,
11050 int __scale)
936c0fe4 11051{
6b62f323
JJ
11052 return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old,
11053 __addr,
11054 (__v4si) __index,
11055 __mask, __scale);
936c0fe4
AI
11056}
11057
6b62f323 11058extern __inline __m128i
936c0fe4 11059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11060_mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
11061 __m256i __index, void const *__addr,
11062 int __scale)
936c0fe4 11063{
6b62f323
JJ
11064 return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old,
11065 __addr,
11066 (__v4di) __index,
11067 __mask, __scale);
936c0fe4
AI
11068}
11069
11070extern __inline __m128i
11071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11072_mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
11073 __m128i __index, void const *__addr,
11074 int __scale)
936c0fe4 11075{
6b62f323
JJ
11076 return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old,
11077 __addr,
11078 (__v2di) __index,
11079 __mask, __scale);
936c0fe4
AI
11080}
11081
6b62f323 11082extern __inline __m256i
936c0fe4 11083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11084_mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask,
11085 __m256i __index, void const *__addr,
11086 int __scale)
936c0fe4 11087{
6b62f323
JJ
11088 return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old,
11089 __addr,
11090 (__v4di) __index,
11091 __mask, __scale);
936c0fe4
AI
11092}
11093
11094extern __inline __m128i
11095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11096_mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask,
11097 __m128i __index, void const *__addr,
11098 int __scale)
936c0fe4 11099{
6b62f323
JJ
11100 return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old,
11101 __addr,
11102 (__v2di) __index,
11103 __mask, __scale);
936c0fe4
AI
11104}
11105
6b62f323 11106extern __inline void
936c0fe4 11107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11108_mm256_i32scatter_ps (void *__addr, __m256i __index,
11109 __m256 __v1, const int __scale)
936c0fe4 11110{
6b62f323
JJ
11111 __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,
11112 (__v8si) __index, (__v8sf) __v1,
11113 __scale);
936c0fe4
AI
11114}
11115
6b62f323 11116extern __inline void
936c0fe4 11117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11118_mm256_mask_i32scatter_ps (void *__addr, __mmask8 __mask,
11119 __m256i __index, __m256 __v1,
11120 const int __scale)
936c0fe4 11121{
6b62f323
JJ
11122 __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,
11123 (__v8sf) __v1, __scale);
936c0fe4
AI
11124}
11125
6b62f323 11126extern __inline void
936c0fe4 11127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11128_mm_i32scatter_ps (void *__addr, __m128i __index, __m128 __v1,
11129 const int __scale)
936c0fe4 11130{
6b62f323
JJ
11131 __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,
11132 (__v4si) __index, (__v4sf) __v1,
11133 __scale);
936c0fe4
AI
11134}
11135
6b62f323 11136extern __inline void
936c0fe4 11137__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11138_mm_mask_i32scatter_ps (void *__addr, __mmask8 __mask,
11139 __m128i __index, __m128 __v1,
11140 const int __scale)
936c0fe4 11141{
6b62f323
JJ
11142 __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index,
11143 (__v4sf) __v1, __scale);
936c0fe4
AI
11144}
11145
6b62f323 11146extern __inline void
936c0fe4 11147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11148_mm256_i32scatter_pd (void *__addr, __m128i __index,
11149 __m256d __v1, const int __scale)
936c0fe4 11150{
6b62f323
JJ
11151 __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,
11152 (__v4si) __index, (__v4df) __v1,
11153 __scale);
936c0fe4
AI
11154}
11155
6b62f323 11156extern __inline void
936c0fe4 11157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11158_mm256_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
11159 __m128i __index, __m256d __v1,
11160 const int __scale)
936c0fe4 11161{
6b62f323
JJ
11162 __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index,
11163 (__v4df) __v1, __scale);
936c0fe4
AI
11164}
11165
6b62f323 11166extern __inline void
936c0fe4 11167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11168_mm_i32scatter_pd (void *__addr, __m128i __index,
11169 __m128d __v1, const int __scale)
936c0fe4 11170{
6b62f323
JJ
11171 __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF,
11172 (__v4si) __index, (__v2df) __v1,
11173 __scale);
936c0fe4
AI
11174}
11175
6b62f323 11176extern __inline void
936c0fe4 11177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11178_mm_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
11179 __m128i __index, __m128d __v1,
11180 const int __scale)
936c0fe4 11181{
6b62f323
JJ
11182 __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,
11183 (__v2df) __v1, __scale);
936c0fe4
AI
11184}
11185
6b62f323 11186extern __inline void
936c0fe4 11187__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11188_mm256_i64scatter_ps (void *__addr, __m256i __index,
11189 __m128 __v1, const int __scale)
936c0fe4 11190{
6b62f323
JJ
11191 __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF,
11192 (__v4di) __index, (__v4sf) __v1,
11193 __scale);
936c0fe4
AI
11194}
11195
6b62f323 11196extern __inline void
936c0fe4 11197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11198_mm256_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
11199 __m256i __index, __m128 __v1,
11200 const int __scale)
936c0fe4 11201{
6b62f323
JJ
11202 __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index,
11203 (__v4sf) __v1, __scale);
936c0fe4
AI
11204}
11205
6b62f323 11206extern __inline void
936c0fe4 11207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11208_mm_i64scatter_ps (void *__addr, __m128i __index, __m128 __v1,
11209 const int __scale)
936c0fe4 11210{
6b62f323
JJ
11211 __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,
11212 (__v2di) __index, (__v4sf) __v1,
11213 __scale);
936c0fe4
AI
11214}
11215
6b62f323 11216extern __inline void
936c0fe4 11217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11218_mm_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
11219 __m128i __index, __m128 __v1,
11220 const int __scale)
936c0fe4 11221{
6b62f323
JJ
11222 __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,
11223 (__v4sf) __v1, __scale);
936c0fe4
AI
11224}
11225
6b62f323 11226extern __inline void
936c0fe4 11227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11228_mm256_i64scatter_pd (void *__addr, __m256i __index,
11229 __m256d __v1, const int __scale)
936c0fe4 11230{
6b62f323
JJ
11231 __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,
11232 (__v4di) __index, (__v4df) __v1,
11233 __scale);
936c0fe4
AI
11234}
11235
6b62f323 11236extern __inline void
936c0fe4 11237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11238_mm256_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
11239 __m256i __index, __m256d __v1,
11240 const int __scale)
936c0fe4 11241{
6b62f323
JJ
11242 __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,
11243 (__v4df) __v1, __scale);
936c0fe4
AI
11244}
11245
6b62f323 11246extern __inline void
936c0fe4 11247__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11248_mm_i64scatter_pd (void *__addr, __m128i __index,
11249 __m128d __v1, const int __scale)
936c0fe4 11250{
6b62f323
JJ
11251 __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF,
11252 (__v2di) __index, (__v2df) __v1,
11253 __scale);
936c0fe4
AI
11254}
11255
6b62f323 11256extern __inline void
936c0fe4 11257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11258_mm_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
11259 __m128i __index, __m128d __v1,
11260 const int __scale)
936c0fe4 11261{
6b62f323
JJ
11262 __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index,
11263 (__v2df) __v1, __scale);
936c0fe4
AI
11264}
11265
6b62f323 11266extern __inline void
936c0fe4 11267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11268_mm256_i32scatter_epi32 (void *__addr, __m256i __index,
11269 __m256i __v1, const int __scale)
936c0fe4 11270{
6b62f323
JJ
11271 __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,
11272 (__v8si) __index, (__v8si) __v1,
11273 __scale);
936c0fe4
AI
11274}
11275
6b62f323 11276extern __inline void
936c0fe4 11277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11278_mm256_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask,
11279 __m256i __index, __m256i __v1,
11280 const int __scale)
936c0fe4 11281{
6b62f323
JJ
11282 __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,
11283 (__v8si) __v1, __scale);
936c0fe4
AI
11284}
11285
6b62f323 11286extern __inline void
936c0fe4 11287__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11288_mm_i32scatter_epi32 (void *__addr, __m128i __index,
11289 __m128i __v1, const int __scale)
936c0fe4 11290{
6b62f323
JJ
11291 __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,
11292 (__v4si) __index, (__v4si) __v1,
11293 __scale);
936c0fe4
AI
11294}
11295
6b62f323 11296extern __inline void
936c0fe4 11297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11298_mm_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask,
11299 __m128i __index, __m128i __v1,
11300 const int __scale)
936c0fe4 11301{
6b62f323
JJ
11302 __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,
11303 (__v4si) __v1, __scale);
936c0fe4
AI
11304}
11305
6b62f323 11306extern __inline void
936c0fe4 11307__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11308_mm256_i32scatter_epi64 (void *__addr, __m128i __index,
11309 __m256i __v1, const int __scale)
936c0fe4 11310{
6b62f323
JJ
11311 __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,
11312 (__v4si) __index, (__v4di) __v1,
11313 __scale);
936c0fe4
AI
11314}
11315
6b62f323 11316extern __inline void
936c0fe4 11317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11318_mm256_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
11319 __m128i __index, __m256i __v1,
11320 const int __scale)
936c0fe4 11321{
6b62f323
JJ
11322 __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,
11323 (__v4di) __v1, __scale);
936c0fe4
AI
11324}
11325
6b62f323 11326extern __inline void
936c0fe4 11327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11328_mm_i32scatter_epi64 (void *__addr, __m128i __index,
11329 __m128i __v1, const int __scale)
936c0fe4 11330{
6b62f323
JJ
11331 __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,
11332 (__v4si) __index, (__v2di) __v1,
11333 __scale);
936c0fe4
AI
11334}
11335
6b62f323 11336extern __inline void
936c0fe4 11337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11338_mm_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
11339 __m128i __index, __m128i __v1,
11340 const int __scale)
936c0fe4 11341{
6b62f323
JJ
11342 __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index,
11343 (__v2di) __v1, __scale);
936c0fe4
AI
11344}
11345
6b62f323 11346extern __inline void
936c0fe4 11347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11348_mm256_i64scatter_epi32 (void *__addr, __m256i __index,
11349 __m128i __v1, const int __scale)
936c0fe4 11350{
6b62f323
JJ
11351 __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF,
11352 (__v4di) __index, (__v4si) __v1,
11353 __scale);
936c0fe4
AI
11354}
11355
6b62f323 11356extern __inline void
936c0fe4 11357__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11358_mm256_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
11359 __m256i __index, __m128i __v1,
11360 const int __scale)
936c0fe4 11361{
6b62f323
JJ
11362 __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index,
11363 (__v4si) __v1, __scale);
936c0fe4
AI
11364}
11365
6b62f323 11366extern __inline void
936c0fe4 11367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11368_mm_i64scatter_epi32 (void *__addr, __m128i __index,
11369 __m128i __v1, const int __scale)
936c0fe4 11370{
6b62f323
JJ
11371 __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,
11372 (__v2di) __index, (__v4si) __v1,
11373 __scale);
936c0fe4
AI
11374}
11375
6b62f323 11376extern __inline void
936c0fe4 11377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11378_mm_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
11379 __m128i __index, __m128i __v1,
11380 const int __scale)
936c0fe4 11381{
6b62f323
JJ
11382 __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,
11383 (__v4si) __v1, __scale);
936c0fe4
AI
11384}
11385
6b62f323 11386extern __inline void
936c0fe4 11387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11388_mm256_i64scatter_epi64 (void *__addr, __m256i __index,
11389 __m256i __v1, const int __scale)
936c0fe4 11390{
6b62f323
JJ
11391 __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF,
11392 (__v4di) __index, (__v4di) __v1,
11393 __scale);
936c0fe4
AI
11394}
11395
6b62f323 11396extern __inline void
936c0fe4 11397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11398_mm256_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
11399 __m256i __index, __m256i __v1,
11400 const int __scale)
936c0fe4 11401{
6b62f323
JJ
11402 __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,
11403 (__v4di) __v1, __scale);
936c0fe4
AI
11404}
11405
6b62f323 11406extern __inline void
936c0fe4 11407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11408_mm_i64scatter_epi64 (void *__addr, __m128i __index,
11409 __m128i __v1, const int __scale)
936c0fe4 11410{
6b62f323
JJ
11411 __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF,
11412 (__v2di) __index, (__v2di) __v1,
11413 __scale);
936c0fe4
AI
11414}
11415
6b62f323 11416extern __inline void
936c0fe4 11417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11418_mm_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
11419 __m128i __index, __m128i __v1,
11420 const int __scale)
936c0fe4 11421{
6b62f323
JJ
11422 __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,
11423 (__v2di) __v1, __scale);
936c0fe4
AI
11424}
11425
11426extern __inline __m256i
11427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11428_mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11429 _MM_PERM_ENUM __mask)
936c0fe4 11430{
6b62f323
JJ
11431 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
11432 (__v8si) __W,
936c0fe4
AI
11433 (__mmask8) __U);
11434}
11435
11436extern __inline __m256i
11437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11438_mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A,
11439 _MM_PERM_ENUM __mask)
936c0fe4 11440{
6b62f323
JJ
11441 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
11442 (__v8si)
936c0fe4
AI
11443 _mm256_setzero_si256 (),
11444 (__mmask8) __U);
11445}
11446
6b62f323 11447extern __inline __m128i
936c0fe4 11448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11449_mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11450 _MM_PERM_ENUM __mask)
936c0fe4 11451{
6b62f323
JJ
11452 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
11453 (__v4si) __W,
936c0fe4
AI
11454 (__mmask8) __U);
11455}
11456
6b62f323 11457extern __inline __m128i
936c0fe4 11458__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11459_mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A,
11460 _MM_PERM_ENUM __mask)
936c0fe4 11461{
6b62f323
JJ
11462 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
11463 (__v4si)
11464 _mm_setzero_si128 (),
936c0fe4
AI
11465 (__mmask8) __U);
11466}
11467
6b62f323 11468extern __inline __m256i
936c0fe4 11469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11470_mm256_rol_epi32 (__m256i __A, const int __B)
936c0fe4 11471{
6b62f323
JJ
11472 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11473 (__v8si)
11474 _mm256_setzero_si256 (),
11475 (__mmask8) -1);
936c0fe4
AI
11476}
11477
6b62f323 11478extern __inline __m256i
936c0fe4 11479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11480_mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11481 const int __B)
936c0fe4 11482{
6b62f323
JJ
11483 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11484 (__v8si) __W,
11485 (__mmask8) __U);
936c0fe4
AI
11486}
11487
6b62f323 11488extern __inline __m256i
936c0fe4 11489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11490_mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B)
936c0fe4 11491{
6b62f323
JJ
11492 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11493 (__v8si)
11494 _mm256_setzero_si256 (),
11495 (__mmask8) __U);
936c0fe4
AI
11496}
11497
6b62f323 11498extern __inline __m128i
936c0fe4 11499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11500_mm_rol_epi32 (__m128i __A, const int __B)
936c0fe4 11501{
6b62f323
JJ
11502 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11503 (__v4si)
11504 _mm_setzero_si128 (),
11505 (__mmask8) -1);
936c0fe4
AI
11506}
11507
6b62f323 11508extern __inline __m128i
936c0fe4 11509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11510_mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11511 const int __B)
936c0fe4 11512{
6b62f323
JJ
11513 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11514 (__v4si) __W,
936c0fe4
AI
11515 (__mmask8) __U);
11516}
11517
6b62f323 11518extern __inline __m128i
936c0fe4 11519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11520_mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B)
936c0fe4 11521{
6b62f323
JJ
11522 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11523 (__v4si)
11524 _mm_setzero_si128 (),
936c0fe4
AI
11525 (__mmask8) __U);
11526}
11527
6b62f323 11528extern __inline __m256i
936c0fe4 11529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11530_mm256_ror_epi32 (__m256i __A, const int __B)
936c0fe4 11531{
6b62f323
JJ
11532 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11533 (__v8si)
11534 _mm256_setzero_si256 (),
11535 (__mmask8) -1);
936c0fe4
AI
11536}
11537
6b62f323 11538extern __inline __m256i
936c0fe4 11539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11540_mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11541 const int __B)
936c0fe4 11542{
6b62f323
JJ
11543 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11544 (__v8si) __W,
11545 (__mmask8) __U);
936c0fe4
AI
11546}
11547
11548extern __inline __m256i
11549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11550_mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B)
936c0fe4 11551{
6b62f323
JJ
11552 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11553 (__v8si)
11554 _mm256_setzero_si256 (),
11555 (__mmask8) __U);
936c0fe4
AI
11556}
11557
6b62f323 11558extern __inline __m128i
936c0fe4 11559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11560_mm_ror_epi32 (__m128i __A, const int __B)
936c0fe4 11561{
6b62f323
JJ
11562 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11563 (__v4si)
11564 _mm_setzero_si128 (),
11565 (__mmask8) -1);
936c0fe4
AI
11566}
11567
6b62f323 11568extern __inline __m128i
936c0fe4 11569__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11570_mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11571 const int __B)
936c0fe4 11572{
6b62f323
JJ
11573 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11574 (__v4si) __W,
11575 (__mmask8) __U);
936c0fe4
AI
11576}
11577
6b62f323 11578extern __inline __m128i
936c0fe4 11579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11580_mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B)
936c0fe4 11581{
6b62f323
JJ
11582 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11583 (__v4si)
11584 _mm_setzero_si128 (),
11585 (__mmask8) __U);
936c0fe4
AI
11586}
11587
6b62f323 11588extern __inline __m256i
936c0fe4 11589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11590_mm256_rol_epi64 (__m256i __A, const int __B)
936c0fe4 11591{
6b62f323
JJ
11592 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11593 (__v4di)
11594 _mm256_setzero_si256 (),
11595 (__mmask8) -1);
936c0fe4
AI
11596}
11597
6b62f323 11598extern __inline __m256i
936c0fe4 11599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11600_mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11601 const int __B)
936c0fe4 11602{
6b62f323
JJ
11603 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11604 (__v4di) __W,
11605 (__mmask8) __U);
936c0fe4
AI
11606}
11607
6b62f323 11608extern __inline __m256i
936c0fe4 11609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11610_mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B)
11611{
11612 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11613 (__v4di)
11614 _mm256_setzero_si256 (),
11615 (__mmask8) __U);
936c0fe4
AI
11616}
11617
6b62f323 11618extern __inline __m128i
936c0fe4 11619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11620_mm_rol_epi64 (__m128i __A, const int __B)
936c0fe4 11621{
6b62f323
JJ
11622 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11623 (__v2di)
11624 _mm_setzero_si128 (),
936c0fe4
AI
11625 (__mmask8) -1);
11626}
11627
6b62f323 11628extern __inline __m128i
936c0fe4 11629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11630_mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11631 const int __B)
936c0fe4 11632{
6b62f323
JJ
11633 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11634 (__v2di) __W,
11635 (__mmask8) __U);
936c0fe4
AI
11636}
11637
6b62f323 11638extern __inline __m128i
936c0fe4 11639__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11640_mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
936c0fe4 11641{
6b62f323
JJ
11642 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11643 (__v2di)
11644 _mm_setzero_si128 (),
11645 (__mmask8) __U);
936c0fe4
AI
11646}
11647
6b62f323 11648extern __inline __m256i
936c0fe4 11649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11650_mm256_ror_epi64 (__m256i __A, const int __B)
936c0fe4 11651{
6b62f323
JJ
11652 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11653 (__v4di)
11654 _mm256_setzero_si256 (),
11655 (__mmask8) -1);
936c0fe4
AI
11656}
11657
6b62f323 11658extern __inline __m256i
936c0fe4 11659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11660_mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11661 const int __B)
936c0fe4 11662{
6b62f323
JJ
11663 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11664 (__v4di) __W,
11665 (__mmask8) __U);
936c0fe4
AI
11666}
11667
6b62f323 11668extern __inline __m256i
936c0fe4 11669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11670_mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B)
936c0fe4 11671{
6b62f323
JJ
11672 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11673 (__v4di)
11674 _mm256_setzero_si256 (),
936c0fe4
AI
11675 (__mmask8) __U);
11676}
11677
6b62f323 11678extern __inline __m128i
936c0fe4 11679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11680_mm_ror_epi64 (__m128i __A, const int __B)
936c0fe4 11681{
6b62f323
JJ
11682 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11683 (__v2di)
11684 _mm_setzero_si128 (),
11685 (__mmask8) -1);
936c0fe4
AI
11686}
11687
6b62f323 11688extern __inline __m128i
936c0fe4 11689__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11690_mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11691 const int __B)
936c0fe4 11692{
6b62f323
JJ
11693 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11694 (__v2di) __W,
11695 (__mmask8) __U);
936c0fe4
AI
11696}
11697
6b62f323 11698extern __inline __m128i
936c0fe4 11699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11700_mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
936c0fe4 11701{
6b62f323
JJ
11702 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11703 (__v2di)
11704 _mm_setzero_si128 (),
11705 (__mmask8) __U);
936c0fe4
AI
11706}
11707
6b62f323 11708extern __inline __m128i
936c0fe4 11709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11710_mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm)
936c0fe4 11711{
6b62f323
JJ
11712 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11713 (__v4si) __B, __imm,
11714 (__v4si)
11715 _mm_setzero_si128 (),
11716 (__mmask8) -1);
936c0fe4
AI
11717}
11718
6b62f323 11719extern __inline __m128i
936c0fe4 11720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11721_mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11722 __m128i __B, const int __imm)
936c0fe4 11723{
6b62f323
JJ
11724 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11725 (__v4si) __B, __imm,
11726 (__v4si) __W,
936c0fe4
AI
11727 (__mmask8) __U);
11728}
11729
6b62f323 11730extern __inline __m128i
936c0fe4 11731__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11732_mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B,
11733 const int __imm)
936c0fe4 11734{
6b62f323
JJ
11735 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11736 (__v4si) __B, __imm,
11737 (__v4si)
11738 _mm_setzero_si128 (),
11739 (__mmask8) __U);
936c0fe4
AI
11740}
11741
6b62f323 11742extern __inline __m128i
936c0fe4 11743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11744_mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
936c0fe4 11745{
6b62f323
JJ
11746 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11747 (__v2di) __B, __imm,
11748 (__v2di)
11749 _mm_setzero_si128 (),
11750 (__mmask8) -1);
936c0fe4
AI
11751}
11752
6b62f323 11753extern __inline __m128i
936c0fe4 11754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11755_mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11756 __m128i __B, const int __imm)
936c0fe4 11757{
6b62f323
JJ
11758 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11759 (__v2di) __B, __imm,
11760 (__v2di) __W,
11761 (__mmask8) __U);
936c0fe4
AI
11762}
11763
6b62f323 11764extern __inline __m128i
936c0fe4 11765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11766_mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
11767 const int __imm)
936c0fe4 11768{
6b62f323
JJ
11769 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11770 (__v2di) __B, __imm,
11771 (__v2di)
11772 _mm_setzero_si128 (),
11773 (__mmask8) __U);
936c0fe4
AI
11774}
11775
6b62f323 11776extern __inline __m256i
936c0fe4 11777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11778_mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm)
936c0fe4 11779{
6b62f323
JJ
11780 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11781 (__v8si) __B, __imm,
11782 (__v8si)
11783 _mm256_setzero_si256 (),
936c0fe4
AI
11784 (__mmask8) -1);
11785}
11786
6b62f323 11787extern __inline __m256i
936c0fe4 11788__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11789_mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11790 __m256i __B, const int __imm)
936c0fe4 11791{
6b62f323
JJ
11792 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11793 (__v8si) __B, __imm,
11794 (__v8si) __W,
11795 (__mmask8) __U);
936c0fe4
AI
11796}
11797
6b62f323 11798extern __inline __m256i
936c0fe4 11799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11800_mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B,
11801 const int __imm)
936c0fe4 11802{
6b62f323
JJ
11803 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11804 (__v8si) __B, __imm,
11805 (__v8si)
11806 _mm256_setzero_si256 (),
11807 (__mmask8) __U);
936c0fe4
AI
11808}
11809
6b62f323 11810extern __inline __m256i
936c0fe4 11811__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11812_mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm)
936c0fe4 11813{
6b62f323
JJ
11814 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11815 (__v4di) __B, __imm,
11816 (__v4di)
11817 _mm256_setzero_si256 (),
11818 (__mmask8) -1);
936c0fe4
AI
11819}
11820
6b62f323 11821extern __inline __m256i
936c0fe4 11822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11823_mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11824 __m256i __B, const int __imm)
936c0fe4 11825{
6b62f323
JJ
11826 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11827 (__v4di) __B, __imm,
11828 (__v4di) __W,
936c0fe4
AI
11829 (__mmask8) __U);
11830}
11831
6b62f323 11832extern __inline __m256i
936c0fe4 11833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11834_mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B,
11835 const int __imm)
936c0fe4 11836{
6b62f323
JJ
11837 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11838 (__v4di) __B, __imm,
11839 (__v4di)
11840 _mm256_setzero_si256 (),
936c0fe4
AI
11841 (__mmask8) __U);
11842}
11843
6b62f323 11844extern __inline __m128i
936c0fe4 11845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11846_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A,
11847 const int __I)
936c0fe4 11848{
6b62f323
JJ
11849 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11850 (__v8hi) __W,
936c0fe4
AI
11851 (__mmask8) __U);
11852}
11853
6b62f323 11854extern __inline __m128i
936c0fe4 11855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323 11856_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
936c0fe4 11857{
6b62f323
JJ
11858 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11859 (__v8hi)
11860 _mm_setzero_si128 (),
936c0fe4
AI
11861 (__mmask8) __U);
11862}
11863
6b62f323 11864extern __inline __m128i
936c0fe4 11865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6b62f323
JJ
11866_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A,
11867 const int __I)
936c0fe4 11868{
6b62f323
JJ
11869 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11870 (__v8hi) __W,
11871 (__mmask8) __U);
936c0fe4
AI
11872}
11873
6b62f323
JJ
11874extern __inline __m128i
11875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11876_mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
eee5d6f5 11877{
6b62f323
JJ
11878 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11879 (__v8hi)
11880 _mm_setzero_si128 (),
11881 (__mmask8) __U);
eee5d6f5
AI
11882}
11883
6b62f323
JJ
11884extern __inline __m256i
11885__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11886_mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11887 const int __imm)
936c0fe4 11888{
6b62f323
JJ
11889 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11890 (__v8si) __W,
11891 (__mmask8) __U);
936c0fe4
AI
11892}
11893
6b62f323
JJ
11894extern __inline __m256i
11895__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11896_mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm)
eee5d6f5 11897{
6b62f323
JJ
11898 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11899 (__v8si)
11900 _mm256_setzero_si256 (),
11901 (__mmask8) __U);
eee5d6f5
AI
11902}
11903
6b62f323
JJ
11904extern __inline __m128i
11905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11906_mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11907 const int __imm)
936c0fe4 11908{
6b62f323
JJ
11909 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11910 (__v4si) __W,
11911 (__mmask8) __U);
936c0fe4
AI
11912}
11913
6b62f323
JJ
11914extern __inline __m128i
11915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11916_mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm)
eee5d6f5 11917{
6b62f323
JJ
11918 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11919 (__v4si)
11920 _mm_setzero_si128 (),
11921 (__mmask8) __U);
eee5d6f5
AI
11922}
11923
6b62f323
JJ
11924extern __inline __m256i
11925__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11926_mm256_srai_epi64 (__m256i __A, const int __imm)
936c0fe4 11927{
6b62f323
JJ
11928 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11929 (__v4di)
11930 _mm256_setzero_si256 (),
c42b0bdf 11931 (__mmask8) -1);
936c0fe4
AI
11932}
11933
6b62f323
JJ
11934extern __inline __m256i
11935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11936_mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11937 const int __imm)
936c0fe4 11938{
6b62f323
JJ
11939 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11940 (__v4di) __W,
11941 (__mmask8) __U);
936c0fe4
AI
11942}
11943
6b62f323
JJ
11944extern __inline __m256i
11945__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11946_mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm)
eee5d6f5 11947{
6b62f323
JJ
11948 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11949 (__v4di)
11950 _mm256_setzero_si256 (),
11951 (__mmask8) __U);
eee5d6f5
AI
11952}
11953
6b62f323
JJ
11954extern __inline __m128i
11955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11956_mm_srai_epi64 (__m128i __A, const int __imm)
936c0fe4 11957{
6b62f323
JJ
11958 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11959 (__v2di)
11960 _mm_setzero_si128 (),
c42b0bdf 11961 (__mmask8) -1);
936c0fe4
AI
11962}
11963
6b62f323
JJ
11964extern __inline __m128i
11965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11966_mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11967 const int __imm)
936c0fe4 11968{
6b62f323
JJ
11969 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11970 (__v2di) __W,
11971 (__mmask8) __U);
936c0fe4
AI
11972}
11973
6b62f323
JJ
11974extern __inline __m128i
11975__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11976_mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm)
eee5d6f5 11977{
6b62f323
JJ
11978 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11979 (__v2di)
11980 _mm_setzero_si128 (),
11981 (__mmask8) __U);
eee5d6f5
AI
11982}
11983
6b62f323
JJ
11984extern __inline __m128i
11985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11986_mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
936c0fe4 11987{
6b62f323
JJ
11988 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11989 (__v4si) __W,
11990 (__mmask8) __U);
936c0fe4
AI
11991}
11992
6b62f323
JJ
11993extern __inline __m128i
11994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11995_mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B)
eee5d6f5 11996{
6b62f323
JJ
11997 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11998 (__v4si)
11999 _mm_setzero_si128 (),
12000 (__mmask8) __U);
eee5d6f5
AI
12001}
12002
6b62f323
JJ
12003extern __inline __m128i
12004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12005_mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
936c0fe4 12006{
6b62f323
JJ
12007 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
12008 (__v2di) __W,
12009 (__mmask8) __U);
936c0fe4
AI
12010}
12011
6b62f323
JJ
12012extern __inline __m128i
12013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12014_mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
eee5d6f5 12015{
6b62f323
JJ
12016 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
12017 (__v2di)
12018 _mm_setzero_si128 (),
12019 (__mmask8) __U);
eee5d6f5
AI
12020}
12021
6b62f323
JJ
12022extern __inline __m256i
12023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12024_mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
12025 int __B)
936c0fe4 12026{
6b62f323
JJ
12027 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
12028 (__v8si) __W,
12029 (__mmask8) __U);
936c0fe4
AI
12030}
12031
6b62f323
JJ
12032extern __inline __m256i
12033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12034_mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B)
12035{
12036 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
12037 (__v8si)
12038 _mm256_setzero_si256 (),
12039 (__mmask8) __U);
eee5d6f5
AI
12040}
12041
6b62f323
JJ
12042extern __inline __m256i
12043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12044_mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
12045 int __B)
936c0fe4 12046{
6b62f323
JJ
12047 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
12048 (__v4di) __W,
12049 (__mmask8) __U);
936c0fe4
AI
12050}
12051
6b62f323
JJ
12052extern __inline __m256i
12053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12054_mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B)
eee5d6f5 12055{
6b62f323
JJ
12056 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
12057 (__v4di)
12058 _mm256_setzero_si256 (),
12059 (__mmask8) __U);
eee5d6f5
AI
12060}
12061
6b62f323
JJ
12062extern __inline __m256d
12063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12064_mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X,
12065 const int __imm)
936c0fe4 12066{
6b62f323
JJ
12067 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
12068 (__v4df) __W,
12069 (__mmask8) __U);
936c0fe4
AI
12070}
12071
6b62f323
JJ
12072extern __inline __m256d
12073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12074_mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm)
eee5d6f5 12075{
6b62f323
JJ
12076 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
12077 (__v4df)
12078 _mm256_setzero_pd (),
12079 (__mmask8) __U);
eee5d6f5
AI
12080}
12081
6b62f323
JJ
12082extern __inline __m256d
12083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12084_mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X,
12085 const int __C)
936c0fe4 12086{
6b62f323
JJ
12087 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
12088 (__v4df) __W,
12089 (__mmask8) __U);
936c0fe4
AI
12090}
12091
6b62f323
JJ
12092extern __inline __m256d
12093__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12094_mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C)
eee5d6f5 12095{
6b62f323
JJ
12096 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
12097 (__v4df)
12098 _mm256_setzero_pd (),
12099 (__mmask8) __U);
eee5d6f5
AI
12100}
12101
6b62f323
JJ
12102extern __inline __m128d
12103__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12104_mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X,
12105 const int __C)
936c0fe4 12106{
6b62f323
JJ
12107 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
12108 (__v2df) __W,
12109 (__mmask8) __U);
936c0fe4
AI
12110}
12111
6b62f323
JJ
12112extern __inline __m128d
12113__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12114_mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C)
eee5d6f5 12115{
6b62f323
JJ
12116 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
12117 (__v2df)
12118 _mm_setzero_pd (),
12119 (__mmask8) __U);
eee5d6f5
AI
12120}
12121
6b62f323
JJ
12122extern __inline __m256
12123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12124_mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X,
12125 const int __C)
936c0fe4 12126{
6b62f323
JJ
12127 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
12128 (__v8sf) __W,
12129 (__mmask8) __U);
936c0fe4
AI
12130}
12131
6b62f323
JJ
12132extern __inline __m256
12133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12134_mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C)
eee5d6f5 12135{
6b62f323
JJ
12136 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
12137 (__v8sf)
12138 _mm256_setzero_ps (),
12139 (__mmask8) __U);
eee5d6f5
AI
12140}
12141
6b62f323
JJ
12142extern __inline __m128
12143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12144_mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X,
12145 const int __C)
936c0fe4 12146{
6b62f323
JJ
12147 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
12148 (__v4sf) __W,
12149 (__mmask8) __U);
936c0fe4
AI
12150}
12151
6b62f323
JJ
12152extern __inline __m128
12153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12154_mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
eee5d6f5 12155{
6b62f323
JJ
12156 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
12157 (__v4sf)
12158 _mm_setzero_ps (),
12159 (__mmask8) __U);
eee5d6f5
AI
12160}
12161
6b62f323
JJ
12162extern __inline __m256d
12163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12164_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
936c0fe4 12165{
6b62f323
JJ
12166 return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
12167 (__v4df) __W,
12168 (__mmask8) __U);
936c0fe4
AI
12169}
12170
6b62f323
JJ
12171extern __inline __m256
12172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12173_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
eee5d6f5 12174{
6b62f323
JJ
12175 return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
12176 (__v8sf) __W,
12177 (__mmask8) __U);
eee5d6f5
AI
12178}
12179
6b62f323
JJ
12180extern __inline __m256i
12181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12182_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
936c0fe4 12183{
6b62f323
JJ
12184 return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
12185 (__v4di) __W,
12186 (__mmask8) __U);
936c0fe4
AI
12187}
12188
6b62f323
JJ
12189extern __inline __m256i
12190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12191_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
eee5d6f5 12192{
6b62f323
JJ
12193 return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
12194 (__v8si) __W,
12195 (__mmask8) __U);
eee5d6f5
AI
12196}
12197
6b62f323
JJ
12198extern __inline __m128d
12199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12200_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
936c0fe4 12201{
6b62f323
JJ
12202 return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
12203 (__v2df) __W,
12204 (__mmask8) __U);
936c0fe4
AI
12205}
12206
6b62f323
JJ
12207extern __inline __m128
12208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12209_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
eee5d6f5 12210{
6b62f323
JJ
12211 return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
12212 (__v4sf) __W,
12213 (__mmask8) __U);
eee5d6f5
AI
12214}
12215
6b62f323
JJ
12216extern __inline __m128i
12217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12218_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
936c0fe4 12219{
6b62f323
JJ
12220 return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
12221 (__v2di) __W,
12222 (__mmask8) __U);
936c0fe4
AI
12223}
12224
6b62f323
JJ
12225extern __inline __m128i
12226__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12227_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
eee5d6f5 12228{
6b62f323
JJ
12229 return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
12230 (__v4si) __W,
12231 (__mmask8) __U);
eee5d6f5
AI
12232}
12233
936c0fe4 12234extern __inline __mmask8
6b62f323
JJ
12235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12236_mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
936c0fe4 12237{
6b62f323
JJ
12238 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12239 (__v4di) __Y, __P,
12240 (__mmask8) -1);
936c0fe4
AI
12241}
12242
eee5d6f5 12243extern __inline __mmask8
6b62f323
JJ
12244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12245_mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P)
eee5d6f5 12246{
6b62f323
JJ
12247 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
12248 (__v8si) __Y, __P,
12249 (__mmask8) -1);
eee5d6f5
AI
12250}
12251
936c0fe4 12252extern __inline __mmask8
6b62f323
JJ
12253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12254_mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P)
936c0fe4 12255{
6b62f323
JJ
12256 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
12257 (__v4di) __Y, __P,
c42b0bdf 12258 (__mmask8) -1);
936c0fe4
AI
12259}
12260
eee5d6f5 12261extern __inline __mmask8
6b62f323
JJ
12262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12263_mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P)
eee5d6f5 12264{
6b62f323
JJ
12265 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
12266 (__v8si) __Y, __P,
12267 (__mmask8) -1);
eee5d6f5
AI
12268}
12269
936c0fe4 12270extern __inline __mmask8
6b62f323
JJ
12271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12272_mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P)
936c0fe4 12273{
6b62f323
JJ
12274 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
12275 (__v4df) __Y, __P,
c42b0bdf 12276 (__mmask8) -1);
936c0fe4
AI
12277}
12278
eee5d6f5 12279extern __inline __mmask8
6b62f323
JJ
12280__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12281_mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P)
eee5d6f5 12282{
6b62f323
JJ
12283 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
12284 (__v8sf) __Y, __P,
12285 (__mmask8) -1);
eee5d6f5
AI
12286}
12287
936c0fe4 12288extern __inline __mmask8
6b62f323
JJ
12289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12290_mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12291 const int __P)
936c0fe4 12292{
6b62f323
JJ
12293 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12294 (__v4di) __Y, __P,
12295 (__mmask8) __U);
936c0fe4
AI
12296}
12297
eee5d6f5 12298extern __inline __mmask8
6b62f323
JJ
12299__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12300_mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12301 const int __P)
eee5d6f5 12302{
6b62f323
JJ
12303 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
12304 (__v8si) __Y, __P,
12305 (__mmask8) __U);
eee5d6f5
AI
12306}
12307
936c0fe4 12308extern __inline __mmask8
6b62f323
JJ
12309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12310_mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12311 const int __P)
936c0fe4 12312{
6b62f323
JJ
12313 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
12314 (__v4di) __Y, __P,
12315 (__mmask8) __U);
936c0fe4
AI
12316}
12317
eee5d6f5 12318extern __inline __mmask8
6b62f323
JJ
12319__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12320_mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12321 const int __P)
eee5d6f5 12322{
6b62f323
JJ
12323 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
12324 (__v8si) __Y, __P,
12325 (__mmask8) __U);
eee5d6f5
AI
12326}
12327
936c0fe4 12328extern __inline __mmask8
6b62f323
JJ
12329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12330_mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y,
12331 const int __P)
936c0fe4 12332{
6b62f323
JJ
12333 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
12334 (__v4df) __Y, __P,
12335 (__mmask8) __U);
936c0fe4
AI
12336}
12337
eee5d6f5 12338extern __inline __mmask8
6b62f323
JJ
12339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12340_mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y,
12341 const int __P)
eee5d6f5 12342{
6b62f323
JJ
12343 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
12344 (__v8sf) __Y, __P,
12345 (__mmask8) __U);
eee5d6f5
AI
12346}
12347
936c0fe4 12348extern __inline __mmask8
6b62f323
JJ
12349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12350_mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P)
936c0fe4 12351{
6b62f323
JJ
12352 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12353 (__v2di) __Y, __P,
c42b0bdf 12354 (__mmask8) -1);
936c0fe4
AI
12355}
12356
eee5d6f5 12357extern __inline __mmask8
6b62f323
JJ
12358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12359_mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P)
eee5d6f5
AI
12360{
12361 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
6b62f323
JJ
12362 (__v4si) __Y, __P,
12363 (__mmask8) -1);
eee5d6f5
AI
12364}
12365
936c0fe4 12366extern __inline __mmask8
6b62f323
JJ
12367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12368_mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P)
936c0fe4 12369{
6b62f323
JJ
12370 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12371 (__v2di) __Y, __P,
12372 (__mmask8) -1);
936c0fe4
AI
12373}
12374
eee5d6f5 12375extern __inline __mmask8
6b62f323
JJ
12376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12377_mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P)
eee5d6f5 12378{
6b62f323
JJ
12379 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12380 (__v4si) __Y, __P,
12381 (__mmask8) -1);
eee5d6f5
AI
12382}
12383
936c0fe4 12384extern __inline __mmask8
6b62f323
JJ
12385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12386_mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P)
936c0fe4 12387{
6b62f323
JJ
12388 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
12389 (__v2df) __Y, __P,
12390 (__mmask8) -1);
936c0fe4
AI
12391}
12392
eee5d6f5 12393extern __inline __mmask8
6b62f323
JJ
12394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12395_mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P)
eee5d6f5 12396{
6b62f323
JJ
12397 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
12398 (__v4sf) __Y, __P,
12399 (__mmask8) -1);
eee5d6f5
AI
12400}
12401
936c0fe4 12402extern __inline __mmask8
6b62f323
JJ
12403__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12404_mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12405 const int __P)
936c0fe4
AI
12406{
12407 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
6b62f323
JJ
12408 (__v2di) __Y, __P,
12409 (__mmask8) __U);
936c0fe4
AI
12410}
12411
eee5d6f5 12412extern __inline __mmask8
6b62f323
JJ
12413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12414_mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12415 const int __P)
eee5d6f5 12416{
6b62f323
JJ
12417 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12418 (__v4si) __Y, __P,
12419 (__mmask8) __U);
eee5d6f5
AI
12420}
12421
936c0fe4 12422extern __inline __mmask8
6b62f323
JJ
12423__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12424_mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12425 const int __P)
936c0fe4 12426{
6b62f323
JJ
12427 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12428 (__v2di) __Y, __P,
12429 (__mmask8) __U);
936c0fe4
AI
12430}
12431
eee5d6f5 12432extern __inline __mmask8
6b62f323
JJ
12433__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12434_mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12435 const int __P)
eee5d6f5 12436{
6b62f323
JJ
12437 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12438 (__v4si) __Y, __P,
12439 (__mmask8) __U);
eee5d6f5
AI
12440}
12441
936c0fe4 12442extern __inline __mmask8
6b62f323
JJ
12443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12444_mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y,
12445 const int __P)
936c0fe4 12446{
6b62f323
JJ
12447 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
12448 (__v2df) __Y, __P,
12449 (__mmask8) __U);
936c0fe4
AI
12450}
12451
eee5d6f5 12452extern __inline __mmask8
6b62f323
JJ
12453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12454_mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y,
12455 const int __P)
eee5d6f5 12456{
6b62f323
JJ
12457 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
12458 (__v4sf) __Y, __P,
12459 (__mmask8) __U);
eee5d6f5
AI
12460}
12461
6b62f323
JJ
12462extern __inline __m256d
12463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12464_mm256_permutex_pd (__m256d __X, const int __M)
936c0fe4 12465{
6b62f323
JJ
12466 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M,
12467 (__v4df)
12468 _mm256_undefined_pd (),
12469 (__mmask8) -1);
936c0fe4
AI
12470}
12471
12472#else
12473#define _mm256_permutex_pd(X, M) \
12474 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M), \
a25a7887
JJ
12475 (__v4df)(__m256d) \
12476 _mm256_undefined_pd (), \
936c0fe4
AI
12477 (__mmask8)-1))
12478
395a191d
SP
12479#define _mm256_permutex_epi64(X, I) \
12480 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12481 (int)(I), \
12482 (__v4di)(__m256i) \
12483 (_mm256_setzero_si256 ()),\
12484 (__mmask8) -1))
12485
936c0fe4
AI
12486#define _mm256_maskz_permutex_epi64(M, X, I) \
12487 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12488 (int)(I), \
12489 (__v4di)(__m256i) \
a25a7887 12490 (_mm256_setzero_si256 ()),\
936c0fe4
AI
12491 (__mmask8)(M)))
12492
12493#define _mm256_mask_permutex_epi64(W, M, X, I) \
12494 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12495 (int)(I), \
12496 (__v4di)(__m256i)(W), \
12497 (__mmask8)(M)))
12498
12499#define _mm256_insertf32x4(X, Y, C) \
12500 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12501 (__v4sf)(__m128) (Y), (int) (C), \
a25a7887 12502 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12503 (__mmask8)-1))
12504
12505#define _mm256_mask_insertf32x4(W, U, X, Y, C) \
12506 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12507 (__v4sf)(__m128) (Y), (int) (C), \
12508 (__v8sf)(__m256)(W), \
12509 (__mmask8)(U)))
12510
12511#define _mm256_maskz_insertf32x4(U, X, Y, C) \
12512 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12513 (__v4sf)(__m128) (Y), (int) (C), \
a25a7887 12514 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12515 (__mmask8)(U)))
12516
12517#define _mm256_inserti32x4(X, Y, C) \
12518 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12519 (__v4si)(__m128i) (Y), (int) (C), \
a25a7887 12520 (__v8si)(__m256i)_mm256_setzero_si256 (), \
936c0fe4
AI
12521 (__mmask8)-1))
12522
12523#define _mm256_mask_inserti32x4(W, U, X, Y, C) \
12524 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12525 (__v4si)(__m128i) (Y), (int) (C), \
12526 (__v8si)(__m256i)(W), \
12527 (__mmask8)(U)))
12528
12529#define _mm256_maskz_inserti32x4(U, X, Y, C) \
12530 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12531 (__v4si)(__m128i) (Y), (int) (C), \
a25a7887 12532 (__v8si)(__m256i)_mm256_setzero_si256 (), \
936c0fe4
AI
12533 (__mmask8)(U)))
12534
12535#define _mm256_extractf32x4_ps(X, C) \
12536 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12537 (int) (C), \
a25a7887 12538 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
12539 (__mmask8)-1))
12540
12541#define _mm256_mask_extractf32x4_ps(W, U, X, C) \
12542 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12543 (int) (C), \
12544 (__v4sf)(__m128)(W), \
12545 (__mmask8)(U)))
12546
12547#define _mm256_maskz_extractf32x4_ps(U, X, C) \
12548 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12549 (int) (C), \
a25a7887 12550 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
12551 (__mmask8)(U)))
12552
12553#define _mm256_extracti32x4_epi32(X, C) \
12554 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12555 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1))
12556
12557#define _mm256_mask_extracti32x4_epi32(W, U, X, C) \
12558 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12559 (int) (C), (__v4si)(__m128i)(W), (__mmask8)(U)))
12560
12561#define _mm256_maskz_extracti32x4_epi32(U, X, C) \
12562 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12563 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
12564
12565#define _mm256_shuffle_i64x2(X, Y, C) \
12566 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12567 (__v4di)(__m256i)(Y), (int)(C), \
12568 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12569 (__mmask8)-1))
12570
12571#define _mm256_mask_shuffle_i64x2(W, U, X, Y, C) \
12572 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12573 (__v4di)(__m256i)(Y), (int)(C), \
12574 (__v4di)(__m256i)(W),\
12575 (__mmask8)(U)))
12576
12577#define _mm256_maskz_shuffle_i64x2(U, X, Y, C) \
12578 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12579 (__v4di)(__m256i)(Y), (int)(C), \
12580 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12581 (__mmask8)(U)))
12582
12583#define _mm256_shuffle_i32x4(X, Y, C) \
12584 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12585 (__v8si)(__m256i)(Y), (int)(C), \
a25a7887
JJ
12586 (__v8si)(__m256i) \
12587 _mm256_setzero_si256 (), \
936c0fe4
AI
12588 (__mmask8)-1))
12589
12590#define _mm256_mask_shuffle_i32x4(W, U, X, Y, C) \
12591 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12592 (__v8si)(__m256i)(Y), (int)(C), \
12593 (__v8si)(__m256i)(W), \
12594 (__mmask8)(U)))
12595
12596#define _mm256_maskz_shuffle_i32x4(U, X, Y, C) \
12597 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12598 (__v8si)(__m256i)(Y), (int)(C), \
a25a7887
JJ
12599 (__v8si)(__m256i) \
12600 _mm256_setzero_si256 (), \
936c0fe4
AI
12601 (__mmask8)(U)))
12602
12603#define _mm256_shuffle_f64x2(X, Y, C) \
12604 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12605 (__v4df)(__m256d)(Y), (int)(C), \
a25a7887 12606 (__v4df)(__m256d)_mm256_setzero_pd (),\
936c0fe4
AI
12607 (__mmask8)-1))
12608
12609#define _mm256_mask_shuffle_f64x2(W, U, X, Y, C) \
12610 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12611 (__v4df)(__m256d)(Y), (int)(C), \
12612 (__v4df)(__m256d)(W), \
12613 (__mmask8)(U)))
12614
12615#define _mm256_maskz_shuffle_f64x2(U, X, Y, C) \
12616 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12617 (__v4df)(__m256d)(Y), (int)(C), \
a25a7887 12618 (__v4df)(__m256d)_mm256_setzero_pd( ),\
936c0fe4
AI
12619 (__mmask8)(U)))
12620
12621#define _mm256_shuffle_f32x4(X, Y, C) \
12622 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12623 (__v8sf)(__m256)(Y), (int)(C), \
a25a7887 12624 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12625 (__mmask8)-1))
12626
12627#define _mm256_mask_shuffle_f32x4(W, U, X, Y, C) \
12628 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12629 (__v8sf)(__m256)(Y), (int)(C), \
12630 (__v8sf)(__m256)(W), \
12631 (__mmask8)(U)))
12632
12633#define _mm256_maskz_shuffle_f32x4(U, X, Y, C) \
12634 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12635 (__v8sf)(__m256)(Y), (int)(C), \
a25a7887 12636 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12637 (__mmask8)(U)))
12638
12639#define _mm256_mask_shuffle_pd(W, U, A, B, C) \
12640 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
12641 (__v4df)(__m256d)(B), (int)(C), \
12642 (__v4df)(__m256d)(W), \
12643 (__mmask8)(U)))
12644
12645#define _mm256_maskz_shuffle_pd(U, A, B, C) \
12646 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
12647 (__v4df)(__m256d)(B), (int)(C), \
a25a7887
JJ
12648 (__v4df)(__m256d) \
12649 _mm256_setzero_pd (), \
936c0fe4
AI
12650 (__mmask8)(U)))
12651
12652#define _mm_mask_shuffle_pd(W, U, A, B, C) \
12653 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
12654 (__v2df)(__m128d)(B), (int)(C), \
12655 (__v2df)(__m128d)(W), \
12656 (__mmask8)(U)))
12657
12658#define _mm_maskz_shuffle_pd(U, A, B, C) \
12659 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
12660 (__v2df)(__m128d)(B), (int)(C), \
a25a7887 12661 (__v2df)(__m128d)_mm_setzero_pd (), \
936c0fe4
AI
12662 (__mmask8)(U)))
12663
12664#define _mm256_mask_shuffle_ps(W, U, A, B, C) \
12665 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
12666 (__v8sf)(__m256)(B), (int)(C), \
12667 (__v8sf)(__m256)(W), \
12668 (__mmask8)(U)))
12669
12670#define _mm256_maskz_shuffle_ps(U, A, B, C) \
12671 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
12672 (__v8sf)(__m256)(B), (int)(C), \
a25a7887 12673 (__v8sf)(__m256)_mm256_setzero_ps (),\
936c0fe4
AI
12674 (__mmask8)(U)))
12675
12676#define _mm_mask_shuffle_ps(W, U, A, B, C) \
12677 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
12678 (__v4sf)(__m128)(B), (int)(C), \
12679 (__v4sf)(__m128)(W), \
12680 (__mmask8)(U)))
12681
12682#define _mm_maskz_shuffle_ps(U, A, B, C) \
12683 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
12684 (__v4sf)(__m128)(B), (int)(C), \
a25a7887 12685 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
12686 (__mmask8)(U)))
12687
040d2bba 12688#define _mm256_fixupimm_pd(X, Y, Z, C) \
936c0fe4 12689 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
040d2bba
WX
12690 (__v4df)(__m256d)(Y), \
12691 (__v4di)(__m256i)(Z), (int)(C), \
936c0fe4
AI
12692 (__mmask8)(-1)))
12693
040d2bba 12694#define _mm256_mask_fixupimm_pd(X, U, Y, Z, C) \
936c0fe4 12695 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
040d2bba
WX
12696 (__v4df)(__m256d)(Y), \
12697 (__v4di)(__m256i)(Z), (int)(C), \
936c0fe4
AI
12698 (__mmask8)(U)))
12699
040d2bba 12700#define _mm256_maskz_fixupimm_pd(U, X, Y, Z, C) \
936c0fe4 12701 ((__m256d)__builtin_ia32_fixupimmpd256_maskz ((__v4df)(__m256d)(X), \
040d2bba
WX
12702 (__v4df)(__m256d)(Y), \
12703 (__v4di)(__m256i)(Z), (int)(C),\
936c0fe4
AI
12704 (__mmask8)(U)))
12705
040d2bba 12706#define _mm256_fixupimm_ps(X, Y, Z, C) \
936c0fe4 12707 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
040d2bba
WX
12708 (__v8sf)(__m256)(Y), \
12709 (__v8si)(__m256i)(Z), (int)(C), \
936c0fe4
AI
12710 (__mmask8)(-1)))
12711
12712
040d2bba 12713#define _mm256_mask_fixupimm_ps(X, U, Y, Z, C) \
936c0fe4 12714 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
040d2bba
WX
12715 (__v8sf)(__m256)(Y), \
12716 (__v8si)(__m256i)(Z), (int)(C), \
936c0fe4
AI
12717 (__mmask8)(U)))
12718
040d2bba 12719#define _mm256_maskz_fixupimm_ps(U, X, Y, Z, C) \
936c0fe4 12720 ((__m256)__builtin_ia32_fixupimmps256_maskz ((__v8sf)(__m256)(X), \
040d2bba
WX
12721 (__v8sf)(__m256)(Y), \
12722 (__v8si)(__m256i)(Z), (int)(C),\
936c0fe4
AI
12723 (__mmask8)(U)))
12724
040d2bba 12725#define _mm_fixupimm_pd(X, Y, Z, C) \
936c0fe4 12726 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
040d2bba
WX
12727 (__v2df)(__m128d)(Y), \
12728 (__v2di)(__m128i)(Z), (int)(C), \
936c0fe4
AI
12729 (__mmask8)(-1)))
12730
12731
040d2bba 12732#define _mm_mask_fixupimm_pd(X, U, Y, Z, C) \
936c0fe4 12733 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
040d2bba
WX
12734 (__v2df)(__m128d)(Y), \
12735 (__v2di)(__m128i)(Z), (int)(C), \
936c0fe4
AI
12736 (__mmask8)(U)))
12737
040d2bba 12738#define _mm_maskz_fixupimm_pd(U, X, Y, Z, C) \
936c0fe4 12739 ((__m128d)__builtin_ia32_fixupimmpd128_maskz ((__v2df)(__m128d)(X), \
040d2bba
WX
12740 (__v2df)(__m128d)(Y), \
12741 (__v2di)(__m128i)(Z), (int)(C),\
936c0fe4
AI
12742 (__mmask8)(U)))
12743
040d2bba 12744#define _mm_fixupimm_ps(X, Y, Z, C) \
936c0fe4 12745 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
040d2bba
WX
12746 (__v4sf)(__m128)(Y), \
12747 (__v4si)(__m128i)(Z), (int)(C), \
936c0fe4
AI
12748 (__mmask8)(-1)))
12749
040d2bba 12750#define _mm_mask_fixupimm_ps(X, U, Y, Z, C) \
936c0fe4 12751 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
040d2bba
WX
12752 (__v4sf)(__m128)(Y), \
12753 (__v4si)(__m128i)(Z), (int)(C),\
936c0fe4
AI
12754 (__mmask8)(U)))
12755
040d2bba 12756#define _mm_maskz_fixupimm_ps(U, X, Y, Z, C) \
936c0fe4 12757 ((__m128)__builtin_ia32_fixupimmps128_maskz ((__v4sf)(__m128)(X), \
040d2bba
WX
12758 (__v4sf)(__m128)(Y), \
12759 (__v4si)(__m128i)(Z), (int)(C),\
936c0fe4
AI
12760 (__mmask8)(U)))
12761
12762#define _mm256_mask_srli_epi32(W, U, A, B) \
12763 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
12764 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
12765
12766#define _mm256_maskz_srli_epi32(U, A, B) \
12767 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
a25a7887 12768 (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
936c0fe4
AI
12769
12770#define _mm_mask_srli_epi32(W, U, A, B) \
12771 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
12772 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
12773
12774#define _mm_maskz_srli_epi32(U, A, B) \
12775 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
a25a7887 12776 (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
936c0fe4
AI
12777
12778#define _mm256_mask_srli_epi64(W, U, A, B) \
12779 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
12780 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
12781
12782#define _mm256_maskz_srli_epi64(U, A, B) \
12783 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
12784 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
12785
12786#define _mm_mask_srli_epi64(W, U, A, B) \
12787 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
12788 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
12789
12790#define _mm_maskz_srli_epi64(U, A, B) \
12791 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
a25a7887 12792 (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
936c0fe4
AI
12793
12794#define _mm256_mask_slli_epi32(W, U, X, C) \
12795 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
a25a7887 12796 (__v8si)(__m256i)(W), \
936c0fe4
AI
12797 (__mmask8)(U)))
12798
12799#define _mm256_maskz_slli_epi32(U, X, C) \
12800 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
a25a7887 12801 (__v8si)(__m256i)_mm256_setzero_si256 (), \
936c0fe4
AI
12802 (__mmask8)(U)))
12803
12804#define _mm256_mask_slli_epi64(W, U, X, C) \
12805 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
a25a7887 12806 (__v4di)(__m256i)(W), \
936c0fe4
AI
12807 (__mmask8)(U)))
12808
12809#define _mm256_maskz_slli_epi64(U, X, C) \
12810 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
a25a7887 12811 (__v4di)(__m256i)_mm256_setzero_si256 (), \
936c0fe4
AI
12812 (__mmask8)(U)))
12813
12814#define _mm_mask_slli_epi32(W, U, X, C) \
12815 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12816 (__v4si)(__m128i)(W),\
12817 (__mmask8)(U)))
12818
12819#define _mm_maskz_slli_epi32(U, X, C) \
12820 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12821 (__v4si)(__m128i)_mm_setzero_si128 (),\
12822 (__mmask8)(U)))
12823
12824#define _mm_mask_slli_epi64(W, U, X, C) \
12825 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12826 (__v2di)(__m128i)(W),\
12827 (__mmask8)(U)))
12828
12829#define _mm_maskz_slli_epi64(U, X, C) \
12830 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
a25a7887 12831 (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
12832 (__mmask8)(U)))
12833
12834#define _mm256_ternarylogic_epi64(A, B, C, I) \
12835 ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
12836 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)-1))
12837
12838#define _mm256_mask_ternarylogic_epi64(A, U, B, C, I) \
12839 ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
12840 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12841
12842#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I) \
12843 ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di)(__m256i)(A), \
12844 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12845
12846#define _mm256_ternarylogic_epi32(A, B, C, I) \
12847 ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
12848 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)-1))
12849
12850#define _mm256_mask_ternarylogic_epi32(A, U, B, C, I) \
12851 ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
12852 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12853
12854#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I) \
12855 ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si)(__m256i)(A), \
12856 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12857
12858#define _mm_ternarylogic_epi64(A, B, C, I) \
12859 ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
12860 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)-1))
12861
12862#define _mm_mask_ternarylogic_epi64(A, U, B, C, I) \
12863 ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
12864 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12865
12866#define _mm_maskz_ternarylogic_epi64(U, A, B, C, I) \
12867 ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di)(__m128i)(A), \
12868 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12869
12870#define _mm_ternarylogic_epi32(A, B, C, I) \
12871 ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
12872 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)-1))
12873
12874#define _mm_mask_ternarylogic_epi32(A, U, B, C, I) \
12875 ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
12876 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12877
12878#define _mm_maskz_ternarylogic_epi32(U, A, B, C, I) \
12879 ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si)(__m128i)(A), \
12880 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12881
12882#define _mm256_roundscale_ps(A, B) \
12883 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
a25a7887 12884 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)-1))
936c0fe4
AI
12885
12886#define _mm256_mask_roundscale_ps(W, U, A, B) \
12887 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
12888 (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
12889
12890#define _mm256_maskz_roundscale_ps(U, A, B) \
12891 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
a25a7887 12892 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)(U)))
936c0fe4
AI
12893
12894#define _mm256_roundscale_pd(A, B) \
12895 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
a25a7887 12896 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)-1))
936c0fe4
AI
12897
12898#define _mm256_mask_roundscale_pd(W, U, A, B) \
12899 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
12900 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
12901
12902#define _mm256_maskz_roundscale_pd(U, A, B) \
12903 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
a25a7887 12904 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
936c0fe4
AI
12905
12906#define _mm_roundscale_ps(A, B) \
12907 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
a25a7887 12908 (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)-1))
936c0fe4
AI
12909
12910#define _mm_mask_roundscale_ps(W, U, A, B) \
12911 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
12912 (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
12913
12914#define _mm_maskz_roundscale_ps(U, A, B) \
12915 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
a25a7887 12916 (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)(U)))
936c0fe4
AI
12917
12918#define _mm_roundscale_pd(A, B) \
12919 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
a25a7887 12920 (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)-1))
936c0fe4
AI
12921
12922#define _mm_mask_roundscale_pd(W, U, A, B) \
12923 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
12924 (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
12925
12926#define _mm_maskz_roundscale_pd(U, A, B) \
12927 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
a25a7887 12928 (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)(U)))
936c0fe4
AI
12929
12930#define _mm256_getmant_ps(X, B, C) \
12931 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12932 (int)(((C)<<2) | (B)), \
a25a7887 12933 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12934 (__mmask8)-1))
12935
12936#define _mm256_mask_getmant_ps(W, U, X, B, C) \
12937 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12938 (int)(((C)<<2) | (B)), \
12939 (__v8sf)(__m256)(W), \
12940 (__mmask8)(U)))
12941
12942#define _mm256_maskz_getmant_ps(U, X, B, C) \
12943 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12944 (int)(((C)<<2) | (B)), \
a25a7887 12945 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
12946 (__mmask8)(U)))
12947
12948#define _mm_getmant_ps(X, B, C) \
12949 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12950 (int)(((C)<<2) | (B)), \
a25a7887 12951 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
12952 (__mmask8)-1))
12953
12954#define _mm_mask_getmant_ps(W, U, X, B, C) \
12955 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12956 (int)(((C)<<2) | (B)), \
12957 (__v4sf)(__m128)(W), \
12958 (__mmask8)(U)))
12959
12960#define _mm_maskz_getmant_ps(U, X, B, C) \
12961 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12962 (int)(((C)<<2) | (B)), \
a25a7887 12963 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
12964 (__mmask8)(U)))
12965
12966#define _mm256_getmant_pd(X, B, C) \
12967 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12968 (int)(((C)<<2) | (B)), \
a25a7887 12969 (__v4df)(__m256d)_mm256_setzero_pd (),\
936c0fe4
AI
12970 (__mmask8)-1))
12971
12972#define _mm256_mask_getmant_pd(W, U, X, B, C) \
12973 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12974 (int)(((C)<<2) | (B)), \
12975 (__v4df)(__m256d)(W), \
12976 (__mmask8)(U)))
12977
12978#define _mm256_maskz_getmant_pd(U, X, B, C) \
12979 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12980 (int)(((C)<<2) | (B)), \
a25a7887 12981 (__v4df)(__m256d)_mm256_setzero_pd (),\
936c0fe4
AI
12982 (__mmask8)(U)))
12983
12984#define _mm_getmant_pd(X, B, C) \
12985 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12986 (int)(((C)<<2) | (B)), \
a25a7887 12987 (__v2df)(__m128d)_mm_setzero_pd (), \
936c0fe4
AI
12988 (__mmask8)-1))
12989
12990#define _mm_mask_getmant_pd(W, U, X, B, C) \
12991 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12992 (int)(((C)<<2) | (B)), \
12993 (__v2df)(__m128d)(W), \
12994 (__mmask8)(U)))
12995
12996#define _mm_maskz_getmant_pd(U, X, B, C) \
12997 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12998 (int)(((C)<<2) | (B)), \
a25a7887 12999 (__v2df)(__m128d)_mm_setzero_pd (), \
936c0fe4
AI
13000 (__mmask8)(U)))
13001
13002#define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
13003 (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256)V1OLD, \
0e171d54 13004 (void const *)ADDR, \
936c0fe4
AI
13005 (__v8si)(__m256i)INDEX, \
13006 (__mmask8)MASK, (int)SCALE)
13007
13008#define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
13009 (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128)V1OLD, \
0e171d54 13010 (void const *)ADDR, \
936c0fe4
AI
13011 (__v4si)(__m128i)INDEX, \
13012 (__mmask8)MASK, (int)SCALE)
13013
13014#define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
13015 (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d)V1OLD, \
0e171d54 13016 (void const *)ADDR, \
936c0fe4
AI
13017 (__v4si)(__m128i)INDEX, \
13018 (__mmask8)MASK, (int)SCALE)
13019
13020#define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
13021 (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d)V1OLD, \
0e171d54 13022 (void const *)ADDR, \
936c0fe4
AI
13023 (__v4si)(__m128i)INDEX, \
13024 (__mmask8)MASK, (int)SCALE)
13025
13026#define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
13027 (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128)V1OLD, \
0e171d54 13028 (void const *)ADDR, \
936c0fe4
AI
13029 (__v4di)(__m256i)INDEX, \
13030 (__mmask8)MASK, (int)SCALE)
13031
13032#define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
13033 (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128)V1OLD, \
0e171d54 13034 (void const *)ADDR, \
936c0fe4
AI
13035 (__v2di)(__m128i)INDEX, \
13036 (__mmask8)MASK, (int)SCALE)
13037
13038#define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
13039 (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d)V1OLD, \
0e171d54 13040 (void const *)ADDR, \
936c0fe4
AI
13041 (__v4di)(__m256i)INDEX, \
13042 (__mmask8)MASK, (int)SCALE)
13043
13044#define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
13045 (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d)V1OLD, \
0e171d54 13046 (void const *)ADDR, \
936c0fe4
AI
13047 (__v2di)(__m128i)INDEX, \
13048 (__mmask8)MASK, (int)SCALE)
13049
13050#define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
13051 (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i)V1OLD, \
0e171d54 13052 (void const *)ADDR, \
936c0fe4
AI
13053 (__v8si)(__m256i)INDEX, \
13054 (__mmask8)MASK, (int)SCALE)
13055
13056#define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
13057 (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i)V1OLD, \
0e171d54 13058 (void const *)ADDR, \
936c0fe4
AI
13059 (__v4si)(__m128i)INDEX, \
13060 (__mmask8)MASK, (int)SCALE)
13061
13062#define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
13063 (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i)V1OLD, \
0e171d54 13064 (void const *)ADDR, \
936c0fe4
AI
13065 (__v4si)(__m128i)INDEX, \
13066 (__mmask8)MASK, (int)SCALE)
13067
13068#define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
13069 (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i)V1OLD, \
0e171d54 13070 (void const *)ADDR, \
936c0fe4
AI
13071 (__v4si)(__m128i)INDEX, \
13072 (__mmask8)MASK, (int)SCALE)
13073
13074#define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
13075 (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i)V1OLD, \
0e171d54 13076 (void const *)ADDR, \
936c0fe4
AI
13077 (__v4di)(__m256i)INDEX, \
13078 (__mmask8)MASK, (int)SCALE)
13079
13080#define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
13081 (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i)V1OLD, \
0e171d54 13082 (void const *)ADDR, \
936c0fe4
AI
13083 (__v2di)(__m128i)INDEX, \
13084 (__mmask8)MASK, (int)SCALE)
13085
13086#define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
13087 (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i)V1OLD, \
0e171d54 13088 (void const *)ADDR, \
936c0fe4
AI
13089 (__v4di)(__m256i)INDEX, \
13090 (__mmask8)MASK, (int)SCALE)
13091
13092#define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
13093 (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i)V1OLD, \
0e171d54 13094 (void const *)ADDR, \
936c0fe4
AI
13095 (__v2di)(__m128i)INDEX, \
13096 (__mmask8)MASK, (int)SCALE)
13097
13098#define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
0e171d54 13099 __builtin_ia32_scattersiv8sf ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13100 (__v8si)(__m256i)INDEX, \
13101 (__v8sf)(__m256)V1, (int)SCALE)
13102
13103#define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13104 __builtin_ia32_scattersiv8sf ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13105 (__v8si)(__m256i)INDEX, \
13106 (__v8sf)(__m256)V1, (int)SCALE)
13107
13108#define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
0e171d54 13109 __builtin_ia32_scattersiv4sf ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13110 (__v4si)(__m128i)INDEX, \
13111 (__v4sf)(__m128)V1, (int)SCALE)
13112
13113#define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13114 __builtin_ia32_scattersiv4sf ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13115 (__v4si)(__m128i)INDEX, \
13116 (__v4sf)(__m128)V1, (int)SCALE)
13117
13118#define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
0e171d54 13119 __builtin_ia32_scattersiv4df ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13120 (__v4si)(__m128i)INDEX, \
13121 (__v4df)(__m256d)V1, (int)SCALE)
13122
13123#define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13124 __builtin_ia32_scattersiv4df ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13125 (__v4si)(__m128i)INDEX, \
13126 (__v4df)(__m256d)V1, (int)SCALE)
13127
13128#define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
0e171d54 13129 __builtin_ia32_scattersiv2df ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13130 (__v4si)(__m128i)INDEX, \
13131 (__v2df)(__m128d)V1, (int)SCALE)
13132
13133#define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13134 __builtin_ia32_scattersiv2df ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13135 (__v4si)(__m128i)INDEX, \
13136 (__v2df)(__m128d)V1, (int)SCALE)
13137
13138#define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
0e171d54 13139 __builtin_ia32_scatterdiv8sf ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13140 (__v4di)(__m256i)INDEX, \
13141 (__v4sf)(__m128)V1, (int)SCALE)
13142
13143#define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13144 __builtin_ia32_scatterdiv8sf ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13145 (__v4di)(__m256i)INDEX, \
13146 (__v4sf)(__m128)V1, (int)SCALE)
13147
13148#define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
0e171d54 13149 __builtin_ia32_scatterdiv4sf ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13150 (__v2di)(__m128i)INDEX, \
13151 (__v4sf)(__m128)V1, (int)SCALE)
13152
13153#define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13154 __builtin_ia32_scatterdiv4sf ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13155 (__v2di)(__m128i)INDEX, \
13156 (__v4sf)(__m128)V1, (int)SCALE)
13157
13158#define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
0e171d54 13159 __builtin_ia32_scatterdiv4df ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13160 (__v4di)(__m256i)INDEX, \
13161 (__v4df)(__m256d)V1, (int)SCALE)
13162
13163#define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13164 __builtin_ia32_scatterdiv4df ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13165 (__v4di)(__m256i)INDEX, \
13166 (__v4df)(__m256d)V1, (int)SCALE)
13167
13168#define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
0e171d54 13169 __builtin_ia32_scatterdiv2df ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13170 (__v2di)(__m128i)INDEX, \
13171 (__v2df)(__m128d)V1, (int)SCALE)
13172
13173#define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13174 __builtin_ia32_scatterdiv2df ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13175 (__v2di)(__m128i)INDEX, \
13176 (__v2df)(__m128d)V1, (int)SCALE)
13177
13178#define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
0e171d54 13179 __builtin_ia32_scattersiv8si ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13180 (__v8si)(__m256i)INDEX, \
13181 (__v8si)(__m256i)V1, (int)SCALE)
13182
13183#define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13184 __builtin_ia32_scattersiv8si ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13185 (__v8si)(__m256i)INDEX, \
13186 (__v8si)(__m256i)V1, (int)SCALE)
13187
13188#define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
0e171d54 13189 __builtin_ia32_scattersiv4si ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13190 (__v4si)(__m128i)INDEX, \
13191 (__v4si)(__m128i)V1, (int)SCALE)
13192
13193#define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13194 __builtin_ia32_scattersiv4si ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13195 (__v4si)(__m128i)INDEX, \
13196 (__v4si)(__m128i)V1, (int)SCALE)
13197
13198#define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
0e171d54 13199 __builtin_ia32_scattersiv4di ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13200 (__v4si)(__m128i)INDEX, \
13201 (__v4di)(__m256i)V1, (int)SCALE)
13202
13203#define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13204 __builtin_ia32_scattersiv4di ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13205 (__v4si)(__m128i)INDEX, \
13206 (__v4di)(__m256i)V1, (int)SCALE)
13207
13208#define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
0e171d54 13209 __builtin_ia32_scattersiv2di ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13210 (__v4si)(__m128i)INDEX, \
13211 (__v2di)(__m128i)V1, (int)SCALE)
13212
13213#define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13214 __builtin_ia32_scattersiv2di ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13215 (__v4si)(__m128i)INDEX, \
13216 (__v2di)(__m128i)V1, (int)SCALE)
13217
13218#define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
0e171d54 13219 __builtin_ia32_scatterdiv8si ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13220 (__v4di)(__m256i)INDEX, \
13221 (__v4si)(__m128i)V1, (int)SCALE)
13222
13223#define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13224 __builtin_ia32_scatterdiv8si ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13225 (__v4di)(__m256i)INDEX, \
13226 (__v4si)(__m128i)V1, (int)SCALE)
13227
13228#define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
0e171d54 13229 __builtin_ia32_scatterdiv4si ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13230 (__v2di)(__m128i)INDEX, \
13231 (__v4si)(__m128i)V1, (int)SCALE)
13232
13233#define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13234 __builtin_ia32_scatterdiv4si ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13235 (__v2di)(__m128i)INDEX, \
13236 (__v4si)(__m128i)V1, (int)SCALE)
13237
13238#define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
0e171d54 13239 __builtin_ia32_scatterdiv4di ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13240 (__v4di)(__m256i)INDEX, \
13241 (__v4di)(__m256i)V1, (int)SCALE)
13242
13243#define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13244 __builtin_ia32_scatterdiv4di ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13245 (__v4di)(__m256i)INDEX, \
13246 (__v4di)(__m256i)V1, (int)SCALE)
13247
13248#define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
0e171d54 13249 __builtin_ia32_scatterdiv2di ((void *)ADDR, (__mmask8)0xFF, \
936c0fe4
AI
13250 (__v2di)(__m128i)INDEX, \
13251 (__v2di)(__m128i)V1, (int)SCALE)
13252
13253#define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
0e171d54 13254 __builtin_ia32_scatterdiv2di ((void *)ADDR, (__mmask8)MASK, \
936c0fe4
AI
13255 (__v2di)(__m128i)INDEX, \
13256 (__v2di)(__m128i)V1, (int)SCALE)
13257
13258#define _mm256_mask_shuffle_epi32(W, U, X, C) \
13259 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
13260 (__v8si)(__m256i)(W), \
13261 (__mmask8)(U)))
13262
13263#define _mm256_maskz_shuffle_epi32(U, X, C) \
13264 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
a25a7887
JJ
13265 (__v8si)(__m256i) \
13266 _mm256_setzero_si256 (), \
936c0fe4
AI
13267 (__mmask8)(U)))
13268
13269#define _mm_mask_shuffle_epi32(W, U, X, C) \
13270 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
13271 (__v4si)(__m128i)(W), \
13272 (__mmask8)(U)))
13273
13274#define _mm_maskz_shuffle_epi32(U, X, C) \
13275 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
a25a7887 13276 (__v4si)(__m128i)_mm_setzero_si128 (), \
936c0fe4
AI
13277 (__mmask8)(U)))
13278
13279#define _mm256_rol_epi64(A, B) \
13280 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13281 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13282 (__mmask8)-1))
13283
13284#define _mm256_mask_rol_epi64(W, U, A, B) \
13285 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13286 (__v4di)(__m256i)(W), \
13287 (__mmask8)(U)))
13288
13289#define _mm256_maskz_rol_epi64(U, A, B) \
13290 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13291 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13292 (__mmask8)(U)))
13293
13294#define _mm_rol_epi64(A, B) \
13295 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
a25a7887 13296 (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13297 (__mmask8)-1))
13298
13299#define _mm_mask_rol_epi64(W, U, A, B) \
13300 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13301 (__v2di)(__m128i)(W), \
13302 (__mmask8)(U)))
13303
13304#define _mm_maskz_rol_epi64(U, A, B) \
13305 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
a25a7887 13306 (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13307 (__mmask8)(U)))
13308
13309#define _mm256_ror_epi64(A, B) \
13310 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13311 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13312 (__mmask8)-1))
13313
13314#define _mm256_mask_ror_epi64(W, U, A, B) \
13315 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13316 (__v4di)(__m256i)(W), \
13317 (__mmask8)(U)))
13318
13319#define _mm256_maskz_ror_epi64(U, A, B) \
13320 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13321 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13322 (__mmask8)(U)))
13323
13324#define _mm_ror_epi64(A, B) \
13325 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
a25a7887 13326 (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13327 (__mmask8)-1))
13328
13329#define _mm_mask_ror_epi64(W, U, A, B) \
13330 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13331 (__v2di)(__m128i)(W), \
13332 (__mmask8)(U)))
13333
13334#define _mm_maskz_ror_epi64(U, A, B) \
13335 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
a25a7887 13336 (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13337 (__mmask8)(U)))
13338
13339#define _mm256_rol_epi32(A, B) \
13340 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
a25a7887 13341 (__v8si)(__m256i)_mm256_setzero_si256 (),\
936c0fe4
AI
13342 (__mmask8)-1))
13343
13344#define _mm256_mask_rol_epi32(W, U, A, B) \
13345 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
13346 (__v8si)(__m256i)(W), \
13347 (__mmask8)(U)))
13348
13349#define _mm256_maskz_rol_epi32(U, A, B) \
13350 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
a25a7887 13351 (__v8si)(__m256i)_mm256_setzero_si256 (),\
936c0fe4
AI
13352 (__mmask8)(U)))
13353
13354#define _mm_rol_epi32(A, B) \
13355 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
a25a7887 13356 (__v4si)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13357 (__mmask8)-1))
13358
13359#define _mm_mask_rol_epi32(W, U, A, B) \
13360 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
13361 (__v4si)(__m128i)(W), \
13362 (__mmask8)(U)))
13363
13364#define _mm_maskz_rol_epi32(U, A, B) \
13365 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
a25a7887 13366 (__v4si)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13367 (__mmask8)(U)))
13368
13369#define _mm256_ror_epi32(A, B) \
13370 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
a25a7887 13371 (__v8si)(__m256i)_mm256_setzero_si256 (),\
936c0fe4
AI
13372 (__mmask8)-1))
13373
13374#define _mm256_mask_ror_epi32(W, U, A, B) \
13375 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
13376 (__v8si)(__m256i)(W), \
13377 (__mmask8)(U)))
13378
13379#define _mm256_maskz_ror_epi32(U, A, B) \
13380 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
a25a7887
JJ
13381 (__v8si)(__m256i) \
13382 _mm256_setzero_si256 (), \
936c0fe4
AI
13383 (__mmask8)(U)))
13384
13385#define _mm_ror_epi32(A, B) \
13386 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
a25a7887 13387 (__v4si)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13388 (__mmask8)-1))
13389
13390#define _mm_mask_ror_epi32(W, U, A, B) \
13391 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
13392 (__v4si)(__m128i)(W), \
13393 (__mmask8)(U)))
13394
13395#define _mm_maskz_ror_epi32(U, A, B) \
13396 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
a25a7887 13397 (__v4si)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13398 (__mmask8)(U)))
13399
13400#define _mm256_alignr_epi32(X, Y, C) \
13401 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13402 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(X), (__mmask8)-1))
13403
13404#define _mm256_mask_alignr_epi32(W, U, X, Y, C) \
13405 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13406 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(W), (__mmask8)(U)))
13407
13408#define _mm256_maskz_alignr_epi32(U, X, Y, C) \
13409 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13410 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\
13411 (__mmask8)(U)))
13412
13413#define _mm256_alignr_epi64(X, Y, C) \
13414 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13415 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(X), (__mmask8)-1))
13416
13417#define _mm256_mask_alignr_epi64(W, U, X, Y, C) \
13418 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13419 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(W), (__mmask8)(U)))
13420
13421#define _mm256_maskz_alignr_epi64(U, X, Y, C) \
13422 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13423 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\
13424 (__mmask8)(U)))
13425
13426#define _mm_alignr_epi32(X, Y, C) \
13427 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13428 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(X), (__mmask8)-1))
13429
13430#define _mm_mask_alignr_epi32(W, U, X, Y, C) \
13431 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13432 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(W), (__mmask8)(U)))
13433
13434#define _mm_maskz_alignr_epi32(U, X, Y, C) \
13435 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
a25a7887 13436 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13437 (__mmask8)(U)))
13438
13439#define _mm_alignr_epi64(X, Y, C) \
13440 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13441 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13442
13443#define _mm_mask_alignr_epi64(W, U, X, Y, C) \
13444 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13445 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13446
13447#define _mm_maskz_alignr_epi64(U, X, Y, C) \
13448 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
a25a7887 13449 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128 (),\
936c0fe4
AI
13450 (__mmask8)(U)))
13451
13452#define _mm_mask_cvtps_ph(W, U, A, I) \
13453 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
13454 (__v8hi)(__m128i) (W), (__mmask8) (U)))
13455
13456#define _mm_maskz_cvtps_ph(U, A, I) \
13457 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
a25a7887 13458 (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
936c0fe4
AI
13459
13460#define _mm256_mask_cvtps_ph(W, U, A, I) \
13461 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
13462 (__v8hi)(__m128i) (W), (__mmask8) (U)))
13463
13464#define _mm256_maskz_cvtps_ph(U, A, I) \
13465 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
a25a7887 13466 (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
936c0fe4
AI
13467
13468#define _mm256_mask_srai_epi32(W, U, A, B) \
13469 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
13470 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
13471
13472#define _mm256_maskz_srai_epi32(U, A, B) \
13473 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
a25a7887 13474 (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
936c0fe4
AI
13475
13476#define _mm_mask_srai_epi32(W, U, A, B) \
13477 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
13478 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
13479
13480#define _mm_maskz_srai_epi32(U, A, B) \
13481 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
a25a7887 13482 (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
936c0fe4
AI
13483
13484#define _mm256_srai_epi64(A, B) \
13485 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13486 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
13487
13488#define _mm256_mask_srai_epi64(W, U, A, B) \
13489 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13490 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
13491
13492#define _mm256_maskz_srai_epi64(U, A, B) \
13493 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13494 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
13495
13496#define _mm_srai_epi64(A, B) \
13497 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
a25a7887 13498 (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)-1))
936c0fe4
AI
13499
13500#define _mm_mask_srai_epi64(W, U, A, B) \
13501 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
13502 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
13503
13504#define _mm_maskz_srai_epi64(U, A, B) \
13505 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
a25a7887 13506 (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
936c0fe4
AI
13507
13508#define _mm256_mask_permutex_pd(W, U, A, B) \
13509 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
13510 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
13511
13512#define _mm256_maskz_permutex_pd(U, A, B) \
13513 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
a25a7887 13514 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
936c0fe4
AI
13515
13516#define _mm256_mask_permute_pd(W, U, X, C) \
13517 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
13518 (__v4df)(__m256d)(W), \
13519 (__mmask8)(U)))
13520
13521#define _mm256_maskz_permute_pd(U, X, C) \
13522 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
a25a7887 13523 (__v4df)(__m256d)_mm256_setzero_pd (),\
936c0fe4
AI
13524 (__mmask8)(U)))
13525
13526#define _mm256_mask_permute_ps(W, U, X, C) \
13527 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
13528 (__v8sf)(__m256)(W), (__mmask8)(U)))
13529
13530#define _mm256_maskz_permute_ps(U, X, C) \
13531 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
a25a7887 13532 (__v8sf)(__m256)_mm256_setzero_ps (), \
936c0fe4
AI
13533 (__mmask8)(U)))
13534
13535#define _mm_mask_permute_pd(W, U, X, C) \
13536 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
13537 (__v2df)(__m128d)(W), (__mmask8)(U)))
13538
13539#define _mm_maskz_permute_pd(U, X, C) \
13540 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
a25a7887 13541 (__v2df)(__m128d)_mm_setzero_pd (), \
936c0fe4
AI
13542 (__mmask8)(U)))
13543
13544#define _mm_mask_permute_ps(W, U, X, C) \
13545 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
13546 (__v4sf)(__m128)(W), (__mmask8)(U)))
13547
13548#define _mm_maskz_permute_ps(U, X, C) \
13549 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
a25a7887 13550 (__v4sf)(__m128)_mm_setzero_ps (), \
936c0fe4
AI
13551 (__mmask8)(U)))
13552
13553#define _mm256_mask_blend_pd(__U, __A, __W) \
13554 ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A), \
13555 (__v4df) (__W), \
13556 (__mmask8) (__U)))
13557
13558#define _mm256_mask_blend_ps(__U, __A, __W) \
13559 ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A), \
13560 (__v8sf) (__W), \
13561 (__mmask8) (__U)))
13562
13563#define _mm256_mask_blend_epi64(__U, __A, __W) \
13564 ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A), \
13565 (__v4di) (__W), \
13566 (__mmask8) (__U)))
13567
13568#define _mm256_mask_blend_epi32(__U, __A, __W) \
13569 ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A), \
13570 (__v8si) (__W), \
13571 (__mmask8) (__U)))
13572
13573#define _mm_mask_blend_pd(__U, __A, __W) \
13574 ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A), \
13575 (__v2df) (__W), \
13576 (__mmask8) (__U)))
13577
13578#define _mm_mask_blend_ps(__U, __A, __W) \
13579 ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A), \
13580 (__v4sf) (__W), \
13581 (__mmask8) (__U)))
13582
13583#define _mm_mask_blend_epi64(__U, __A, __W) \
13584 ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A), \
13585 (__v2di) (__W), \
13586 (__mmask8) (__U)))
13587
13588#define _mm_mask_blend_epi32(__U, __A, __W) \
13589 ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A), \
13590 (__v4si) (__W), \
13591 (__mmask8) (__U)))
13592
13593#define _mm256_cmp_epu32_mask(X, Y, P) \
13594 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
13595 (__v8si)(__m256i)(Y), (int)(P),\
13596 (__mmask8)-1))
13597
13598#define _mm256_cmp_epi64_mask(X, Y, P) \
13599 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
13600 (__v4di)(__m256i)(Y), (int)(P),\
13601 (__mmask8)-1))
13602
13603#define _mm256_cmp_epi32_mask(X, Y, P) \
13604 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
13605 (__v8si)(__m256i)(Y), (int)(P),\
13606 (__mmask8)-1))
13607
13608#define _mm256_cmp_epu64_mask(X, Y, P) \
13609 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
13610 (__v4di)(__m256i)(Y), (int)(P),\
13611 (__mmask8)-1))
13612
13613#define _mm256_cmp_pd_mask(X, Y, P) \
13614 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
13615 (__v4df)(__m256d)(Y), (int)(P),\
13616 (__mmask8)-1))
13617
13618#define _mm256_cmp_ps_mask(X, Y, P) \
13619 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
13620 (__v8sf)(__m256)(Y), (int)(P),\
13621 (__mmask8)-1))
13622
13623#define _mm256_mask_cmp_epi64_mask(M, X, Y, P) \
13624 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
13625 (__v4di)(__m256i)(Y), (int)(P),\
13626 (__mmask8)(M)))
13627
13628#define _mm256_mask_cmp_epi32_mask(M, X, Y, P) \
13629 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
13630 (__v8si)(__m256i)(Y), (int)(P),\
13631 (__mmask8)(M)))
13632
13633#define _mm256_mask_cmp_epu64_mask(M, X, Y, P) \
13634 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
13635 (__v4di)(__m256i)(Y), (int)(P),\
13636 (__mmask8)(M)))
13637
13638#define _mm256_mask_cmp_epu32_mask(M, X, Y, P) \
13639 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
13640 (__v8si)(__m256i)(Y), (int)(P),\
13641 (__mmask8)(M)))
13642
13643#define _mm256_mask_cmp_pd_mask(M, X, Y, P) \
13644 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
13645 (__v4df)(__m256d)(Y), (int)(P),\
13646 (__mmask8)(M)))
13647
13648#define _mm256_mask_cmp_ps_mask(M, X, Y, P) \
13649 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
13650 (__v8sf)(__m256)(Y), (int)(P),\
13651 (__mmask8)(M)))
13652
13653#define _mm_cmp_epi64_mask(X, Y, P) \
13654 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
13655 (__v2di)(__m128i)(Y), (int)(P),\
13656 (__mmask8)-1))
13657
13658#define _mm_cmp_epi32_mask(X, Y, P) \
13659 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
13660 (__v4si)(__m128i)(Y), (int)(P),\
13661 (__mmask8)-1))
13662
13663#define _mm_cmp_epu64_mask(X, Y, P) \
13664 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
13665 (__v2di)(__m128i)(Y), (int)(P),\
13666 (__mmask8)-1))
13667
13668#define _mm_cmp_epu32_mask(X, Y, P) \
13669 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
13670 (__v4si)(__m128i)(Y), (int)(P),\
13671 (__mmask8)-1))
13672
13673#define _mm_cmp_pd_mask(X, Y, P) \
13674 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
13675 (__v2df)(__m128d)(Y), (int)(P),\
13676 (__mmask8)-1))
13677
13678#define _mm_cmp_ps_mask(X, Y, P) \
13679 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
13680 (__v4sf)(__m128)(Y), (int)(P),\
13681 (__mmask8)-1))
13682
13683#define _mm_mask_cmp_epi64_mask(M, X, Y, P) \
13684 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
13685 (__v2di)(__m128i)(Y), (int)(P),\
13686 (__mmask8)(M)))
13687
13688#define _mm_mask_cmp_epi32_mask(M, X, Y, P) \
13689 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
13690 (__v4si)(__m128i)(Y), (int)(P),\
13691 (__mmask8)(M)))
13692
13693#define _mm_mask_cmp_epu64_mask(M, X, Y, P) \
13694 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
13695 (__v2di)(__m128i)(Y), (int)(P),\
13696 (__mmask8)(M)))
13697
13698#define _mm_mask_cmp_epu32_mask(M, X, Y, P) \
13699 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
13700 (__v4si)(__m128i)(Y), (int)(P),\
13701 (__mmask8)(M)))
13702
13703#define _mm_mask_cmp_pd_mask(M, X, Y, P) \
13704 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
13705 (__v2df)(__m128d)(Y), (int)(P),\
13706 (__mmask8)(M)))
13707
13708#define _mm_mask_cmp_ps_mask(M, X, Y, P) \
13709 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
13710 (__v4sf)(__m128)(Y), (int)(P),\
13711 (__mmask8)(M)))
13712
13713#endif
13714
a25a7887 13715#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps ((B), (A))
936c0fe4
AI
13716
13717#ifdef __DISABLE_AVX512VL__
13718#undef __DISABLE_AVX512VL__
13719#pragma GCC pop_options
13720#endif /* __DISABLE_AVX512VL__ */
13721
13722#endif /* _AVX512VLINTRIN_H_INCLUDED */