]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/avx512vlintrin.h
avx512bwintrin.h: Whitespace fixes.
[thirdparty/gcc.git] / gcc / config / i386 / avx512vlintrin.h
CommitLineData
818ab71a 1/* Copyright (C) 2014-2016 Free Software Foundation, Inc.
936c0fe4
AI
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512VLINTRIN_H_INCLUDED
29#define _AVX512VLINTRIN_H_INCLUDED
30
31/* Doesn't require avx512vl target and is used in avx512dqintrin.h. */
32extern __inline __m128i
33__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
34_mm_setzero_di (void)
35{
36 return __extension__ (__m128i)(__v2di){ 0LL, 0LL};
37}
38
39#ifndef __AVX512VL__
40#pragma GCC push_options
41#pragma GCC target("avx512vl")
42#define __DISABLE_AVX512VL__
43#endif /* __AVX512VL__ */
44
45/* Internal data types for implementing the intrinsics. */
46typedef unsigned int __mmask32;
47
48extern __inline __m256d
49__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
50_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
51{
52 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
53 (__v4df) __W,
54 (__mmask8) __U);
55}
56
57extern __inline __m256d
58__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
59_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
60{
61 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
62 (__v4df)
63 _mm256_setzero_pd (),
64 (__mmask8) __U);
65}
66
67extern __inline __m128d
68__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
69_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
70{
71 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
72 (__v2df) __W,
73 (__mmask8) __U);
74}
75
76extern __inline __m128d
77__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
78_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
79{
80 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
81 (__v2df)
82 _mm_setzero_pd (),
83 (__mmask8) __U);
84}
85
86extern __inline __m256d
87__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
88_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
89{
90 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
91 (__v4df) __W,
92 (__mmask8) __U);
93}
94
95extern __inline __m256d
96__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
97_mm256_maskz_load_pd (__mmask8 __U, void const *__P)
98{
99 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
100 (__v4df)
101 _mm256_setzero_pd (),
102 (__mmask8) __U);
103}
104
105extern __inline __m128d
106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
107_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
108{
109 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
110 (__v2df) __W,
111 (__mmask8) __U);
112}
113
114extern __inline __m128d
115__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
116_mm_maskz_load_pd (__mmask8 __U, void const *__P)
117{
118 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
119 (__v2df)
120 _mm_setzero_pd (),
121 (__mmask8) __U);
122}
123
124extern __inline void
125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
127{
128 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
129 (__v4df) __A,
130 (__mmask8) __U);
131}
132
133extern __inline void
134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
135_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
136{
137 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
138 (__v2df) __A,
139 (__mmask8) __U);
140}
141
142extern __inline __m256
143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
144_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
145{
146 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
147 (__v8sf) __W,
148 (__mmask8) __U);
149}
150
151extern __inline __m256
152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
153_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
154{
155 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
156 (__v8sf)
157 _mm256_setzero_ps (),
158 (__mmask8) __U);
159}
160
161extern __inline __m128
162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
163_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
164{
165 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
166 (__v4sf) __W,
167 (__mmask8) __U);
168}
169
170extern __inline __m128
171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
172_mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
173{
174 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
175 (__v4sf)
176 _mm_setzero_ps (),
177 (__mmask8) __U);
178}
179
180extern __inline __m256
181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
182_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
183{
184 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
185 (__v8sf) __W,
186 (__mmask8) __U);
187}
188
189extern __inline __m256
190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
191_mm256_maskz_load_ps (__mmask8 __U, void const *__P)
192{
193 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
194 (__v8sf)
195 _mm256_setzero_ps (),
196 (__mmask8) __U);
197}
198
199extern __inline __m128
200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
201_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
202{
203 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
204 (__v4sf) __W,
205 (__mmask8) __U);
206}
207
208extern __inline __m128
209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
210_mm_maskz_load_ps (__mmask8 __U, void const *__P)
211{
212 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
213 (__v4sf)
214 _mm_setzero_ps (),
215 (__mmask8) __U);
216}
217
218extern __inline void
219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
220_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
221{
222 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
223 (__v8sf) __A,
224 (__mmask8) __U);
225}
226
227extern __inline void
228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
229_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
230{
231 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
232 (__v4sf) __A,
233 (__mmask8) __U);
234}
235
236extern __inline __m256i
237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
238_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
239{
240 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
241 (__v4di) __W,
242 (__mmask8) __U);
243}
244
245extern __inline __m256i
246__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
247_mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
248{
249 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
250 (__v4di)
251 _mm256_setzero_si256 (),
252 (__mmask8) __U);
253}
254
255extern __inline __m128i
256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
257_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
258{
259 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
260 (__v2di) __W,
261 (__mmask8) __U);
262}
263
264extern __inline __m128i
265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
266_mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
267{
268 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
269 (__v2di)
270 _mm_setzero_di (),
271 (__mmask8) __U);
272}
273
274extern __inline __m256i
275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
276_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
277{
278 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
279 (__v4di) __W,
280 (__mmask8)
281 __U);
282}
283
284extern __inline __m256i
285__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
286_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
287{
288 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
289 (__v4di)
290 _mm256_setzero_si256 (),
291 (__mmask8)
292 __U);
293}
294
295extern __inline __m128i
296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
297_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
298{
299 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
300 (__v2di) __W,
301 (__mmask8)
302 __U);
303}
304
305extern __inline __m128i
306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
307_mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
308{
309 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
310 (__v2di)
311 _mm_setzero_di (),
312 (__mmask8)
313 __U);
314}
315
316extern __inline void
317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
318_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
319{
320 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
321 (__v4di) __A,
322 (__mmask8) __U);
323}
324
325extern __inline void
326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
327_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
328{
329 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
330 (__v2di) __A,
331 (__mmask8) __U);
332}
333
334extern __inline __m256i
335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
336_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
337{
338 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
339 (__v8si) __W,
340 (__mmask8) __U);
341}
342
343extern __inline __m256i
344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
345_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
346{
347 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
348 (__v8si)
349 _mm256_setzero_si256 (),
350 (__mmask8) __U);
351}
352
353extern __inline __m128i
354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
355_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
356{
357 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
358 (__v4si) __W,
359 (__mmask8) __U);
360}
361
362extern __inline __m128i
363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
364_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
365{
366 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
367 (__v4si)
368 _mm_setzero_si128 (),
369 (__mmask8) __U);
370}
371
372extern __inline __m256i
373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
374_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
375{
376 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
377 (__v8si) __W,
378 (__mmask8)
379 __U);
380}
381
382extern __inline __m256i
383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
384_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
385{
386 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
387 (__v8si)
388 _mm256_setzero_si256 (),
389 (__mmask8)
390 __U);
391}
392
393extern __inline __m128i
394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
395_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
396{
397 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
398 (__v4si) __W,
399 (__mmask8)
400 __U);
401}
402
403extern __inline __m128i
404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
405_mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
406{
407 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
408 (__v4si)
409 _mm_setzero_si128 (),
410 (__mmask8)
411 __U);
412}
413
414extern __inline void
415__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
416_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
417{
418 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
419 (__v8si) __A,
420 (__mmask8) __U);
421}
422
423extern __inline void
424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
425_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
426{
427 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
428 (__v4si) __A,
429 (__mmask8) __U);
430}
431
432extern __inline __m128i
433__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
434_mm_setzero_hi (void)
435{
436 return __extension__ (__m128i) (__v8hi)
437 {
438 0, 0, 0, 0, 0, 0, 0, 0};
439}
440
441extern __inline __m128d
442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
443_mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
444{
445 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
446 (__v2df) __B,
447 (__v2df) __W,
448 (__mmask8) __U);
449}
450
451extern __inline __m128d
452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
453_mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B)
454{
455 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
456 (__v2df) __B,
457 (__v2df)
458 _mm_setzero_pd (),
459 (__mmask8) __U);
460}
461
462extern __inline __m256d
463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
464_mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A,
465 __m256d __B)
466{
467 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
468 (__v4df) __B,
469 (__v4df) __W,
470 (__mmask8) __U);
471}
472
473extern __inline __m256d
474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
475_mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
476{
477 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
478 (__v4df) __B,
479 (__v4df)
480 _mm256_setzero_pd (),
481 (__mmask8) __U);
482}
483
484extern __inline __m128
485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
486_mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
487{
488 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
489 (__v4sf) __B,
490 (__v4sf) __W,
491 (__mmask8) __U);
492}
493
494extern __inline __m128
495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
496_mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B)
497{
498 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
499 (__v4sf) __B,
500 (__v4sf)
501 _mm_setzero_ps (),
502 (__mmask8) __U);
503}
504
505extern __inline __m256
506__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
507_mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
508{
509 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
510 (__v8sf) __B,
511 (__v8sf) __W,
512 (__mmask8) __U);
513}
514
515extern __inline __m256
516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
517_mm256_maskz_add_ps (__mmask16 __U, __m256 __A, __m256 __B)
518{
519 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
520 (__v8sf) __B,
521 (__v8sf)
522 _mm256_setzero_ps (),
523 (__mmask8) __U);
524}
525
526extern __inline __m128d
527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
528_mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
529{
530 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
531 (__v2df) __B,
532 (__v2df) __W,
533 (__mmask8) __U);
534}
535
536extern __inline __m128d
537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
538_mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B)
539{
540 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
541 (__v2df) __B,
542 (__v2df)
543 _mm_setzero_pd (),
544 (__mmask8) __U);
545}
546
547extern __inline __m256d
548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
549_mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
550 __m256d __B)
551{
552 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
553 (__v4df) __B,
554 (__v4df) __W,
555 (__mmask8) __U);
556}
557
558extern __inline __m256d
559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
560_mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
561{
562 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
563 (__v4df) __B,
564 (__v4df)
565 _mm256_setzero_pd (),
566 (__mmask8) __U);
567}
568
569extern __inline __m128
570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
571_mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
572{
573 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
574 (__v4sf) __B,
575 (__v4sf) __W,
576 (__mmask8) __U);
577}
578
579extern __inline __m128
580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
581_mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B)
582{
583 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
584 (__v4sf) __B,
585 (__v4sf)
586 _mm_setzero_ps (),
587 (__mmask8) __U);
588}
589
590extern __inline __m256
591__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
592_mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
593{
594 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
595 (__v8sf) __B,
596 (__v8sf) __W,
597 (__mmask8) __U);
598}
599
600extern __inline __m256
601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
602_mm256_maskz_sub_ps (__mmask16 __U, __m256 __A, __m256 __B)
603{
604 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
605 (__v8sf) __B,
606 (__v8sf)
607 _mm256_setzero_ps (),
608 (__mmask8) __U);
609}
610
611extern __inline void
612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
613_mm256_store_epi64 (void *__P, __m256i __A)
614{
615 *(__m256i *) __P = __A;
616}
617
618extern __inline void
619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
620_mm_store_epi64 (void *__P, __m128i __A)
621{
622 *(__m128i *) __P = __A;
623}
624
625extern __inline __m256d
626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
627_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
628{
fc9cf6da 629 return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
936c0fe4
AI
630 (__v4df) __W,
631 (__mmask8) __U);
632}
633
634extern __inline __m256d
635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
636_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
637{
fc9cf6da 638 return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
936c0fe4
AI
639 (__v4df)
640 _mm256_setzero_pd (),
641 (__mmask8) __U);
642}
643
644extern __inline __m128d
645__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
646_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
647{
fc9cf6da 648 return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
936c0fe4
AI
649 (__v2df) __W,
650 (__mmask8) __U);
651}
652
653extern __inline __m128d
654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
655_mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
656{
fc9cf6da 657 return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
936c0fe4
AI
658 (__v2df)
659 _mm_setzero_pd (),
660 (__mmask8) __U);
661}
662
663extern __inline void
664__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
665_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
666{
fc9cf6da 667 __builtin_ia32_storeupd256_mask ((double *) __P,
936c0fe4
AI
668 (__v4df) __A,
669 (__mmask8) __U);
670}
671
672extern __inline void
673__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
674_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
675{
fc9cf6da 676 __builtin_ia32_storeupd128_mask ((double *) __P,
936c0fe4
AI
677 (__v2df) __A,
678 (__mmask8) __U);
679}
680
681extern __inline __m256
682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
683_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
684{
fc9cf6da 685 return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
936c0fe4
AI
686 (__v8sf) __W,
687 (__mmask8) __U);
688}
689
690extern __inline __m256
691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
692_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
693{
fc9cf6da 694 return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
936c0fe4
AI
695 (__v8sf)
696 _mm256_setzero_ps (),
697 (__mmask8) __U);
698}
699
700extern __inline __m128
701__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
702_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
703{
fc9cf6da 704 return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
936c0fe4
AI
705 (__v4sf) __W,
706 (__mmask8) __U);
707}
708
709extern __inline __m128
710__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
711_mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
712{
fc9cf6da 713 return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
936c0fe4
AI
714 (__v4sf)
715 _mm_setzero_ps (),
716 (__mmask8) __U);
717}
718
719extern __inline void
720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
721_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
722{
fc9cf6da 723 __builtin_ia32_storeups256_mask ((float *) __P,
936c0fe4
AI
724 (__v8sf) __A,
725 (__mmask8) __U);
726}
727
728extern __inline void
729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
730_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
731{
fc9cf6da 732 __builtin_ia32_storeups128_mask ((float *) __P,
936c0fe4
AI
733 (__v4sf) __A,
734 (__mmask8) __U);
735}
736
737extern __inline __m256i
738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
739_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
740{
fc9cf6da 741 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
936c0fe4
AI
742 (__v4di) __W,
743 (__mmask8) __U);
744}
745
746extern __inline __m256i
747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
748_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
749{
fc9cf6da 750 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
936c0fe4
AI
751 (__v4di)
752 _mm256_setzero_si256 (),
753 (__mmask8) __U);
754}
755
756extern __inline __m128i
757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
758_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
759{
fc9cf6da 760 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
936c0fe4
AI
761 (__v2di) __W,
762 (__mmask8) __U);
763}
764
765extern __inline __m128i
766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
767_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
768{
fc9cf6da 769 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
936c0fe4
AI
770 (__v2di)
771 _mm_setzero_di (),
772 (__mmask8) __U);
773}
774
775extern __inline void
776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
777_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
778{
fc9cf6da 779 __builtin_ia32_storedqudi256_mask ((long long *) __P,
936c0fe4
AI
780 (__v4di) __A,
781 (__mmask8) __U);
782}
783
784extern __inline void
785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
787{
fc9cf6da 788 __builtin_ia32_storedqudi128_mask ((long long *) __P,
936c0fe4
AI
789 (__v2di) __A,
790 (__mmask8) __U);
791}
792
793extern __inline __m256i
794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
795_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
796{
fc9cf6da 797 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
936c0fe4
AI
798 (__v8si) __W,
799 (__mmask8) __U);
800}
801
802extern __inline __m256i
803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
804_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
805{
fc9cf6da 806 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
936c0fe4
AI
807 (__v8si)
808 _mm256_setzero_si256 (),
809 (__mmask8) __U);
810}
811
812extern __inline __m128i
813__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
814_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
815{
fc9cf6da 816 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
936c0fe4
AI
817 (__v4si) __W,
818 (__mmask8) __U);
819}
820
821extern __inline __m128i
822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
823_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
824{
fc9cf6da 825 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
936c0fe4
AI
826 (__v4si)
827 _mm_setzero_si128 (),
828 (__mmask8) __U);
829}
830
831extern __inline void
832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
833_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
834{
fc9cf6da 835 __builtin_ia32_storedqusi256_mask ((int *) __P,
936c0fe4
AI
836 (__v8si) __A,
837 (__mmask8) __U);
838}
839
840extern __inline void
841__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
842_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
843{
fc9cf6da 844 __builtin_ia32_storedqusi128_mask ((int *) __P,
936c0fe4
AI
845 (__v4si) __A,
846 (__mmask8) __U);
847}
848
849extern __inline __m256i
850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
851_mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
852{
853 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
854 (__v8si) __W,
855 (__mmask8) __U);
856}
857
858extern __inline __m256i
859__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
860_mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A)
861{
862 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
863 (__v8si)
864 _mm256_setzero_si256 (),
865 (__mmask8) __U);
866}
867
868extern __inline __m128i
869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
870_mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
871{
872 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
873 (__v4si) __W,
874 (__mmask8) __U);
875}
876
877extern __inline __m128i
878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
879_mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A)
880{
881 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
882 (__v4si)
883 _mm_setzero_si128 (),
884 (__mmask8) __U);
885}
886
887extern __inline __m256i
888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
889_mm256_abs_epi64 (__m256i __A)
890{
891 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
892 (__v4di)
893 _mm256_setzero_si256 (),
894 (__mmask8) -1);
895}
896
897extern __inline __m256i
898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
899_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
900{
901 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
902 (__v4di) __W,
903 (__mmask8) __U);
904}
905
906extern __inline __m256i
907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
908_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A)
909{
910 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
911 (__v4di)
912 _mm256_setzero_si256 (),
913 (__mmask8) __U);
914}
915
916extern __inline __m128i
917__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
918_mm_abs_epi64 (__m128i __A)
919{
920 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
921 (__v2di)
922 _mm_setzero_di (),
923 (__mmask8) -1);
924}
925
926extern __inline __m128i
927__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
928_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
929{
930 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
931 (__v2di) __W,
932 (__mmask8) __U);
933}
934
935extern __inline __m128i
936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
937_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
938{
939 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
940 (__v2di)
941 _mm_setzero_di (),
942 (__mmask8) __U);
943}
944
945extern __inline __m128i
946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
947_mm256_cvtpd_epu32 (__m256d __A)
948{
949 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
950 (__v4si)
951 _mm_setzero_si128 (),
952 (__mmask8) -1);
953}
954
955extern __inline __m128i
956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
957_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
958{
959 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
960 (__v4si) __W,
961 (__mmask8) __U);
962}
963
964extern __inline __m128i
965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
966_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A)
967{
968 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
969 (__v4si)
970 _mm_setzero_si128 (),
971 (__mmask8) __U);
972}
973
974extern __inline __m128i
975__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
976_mm_cvtpd_epu32 (__m128d __A)
977{
978 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
979 (__v4si)
980 _mm_setzero_si128 (),
981 (__mmask8) -1);
982}
983
984extern __inline __m128i
985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
986_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
987{
988 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
989 (__v4si) __W,
990 (__mmask8) __U);
991}
992
993extern __inline __m128i
994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
995_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A)
996{
997 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
998 (__v4si)
999 _mm_setzero_si128 (),
1000 (__mmask8) __U);
1001}
1002
1003extern __inline __m256i
1004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1005_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
1006{
1007 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1008 (__v8si) __W,
1009 (__mmask8) __U);
1010}
1011
1012extern __inline __m256i
1013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1014_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A)
1015{
1016 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1017 (__v8si)
1018 _mm256_setzero_si256 (),
1019 (__mmask8) __U);
1020}
1021
1022extern __inline __m128i
1023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1024_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
1025{
1026 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1027 (__v4si) __W,
1028 (__mmask8) __U);
1029}
1030
1031extern __inline __m128i
1032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1033_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A)
1034{
1035 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1036 (__v4si)
1037 _mm_setzero_si128 (),
1038 (__mmask8) __U);
1039}
1040
1041extern __inline __m256i
1042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1043_mm256_cvttps_epu32 (__m256 __A)
1044{
1045 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1046 (__v8si)
1047 _mm256_setzero_si256 (),
1048 (__mmask8) -1);
1049}
1050
1051extern __inline __m256i
1052__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1053_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
1054{
1055 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1056 (__v8si) __W,
1057 (__mmask8) __U);
1058}
1059
1060extern __inline __m256i
1061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1062_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A)
1063{
1064 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1065 (__v8si)
1066 _mm256_setzero_si256 (),
1067 (__mmask8) __U);
1068}
1069
1070extern __inline __m128i
1071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1072_mm_cvttps_epu32 (__m128 __A)
1073{
1074 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1075 (__v4si)
1076 _mm_setzero_si128 (),
1077 (__mmask8) -1);
1078}
1079
1080extern __inline __m128i
1081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
1083{
1084 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1085 (__v4si) __W,
1086 (__mmask8) __U);
1087}
1088
1089extern __inline __m128i
1090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1091_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A)
1092{
1093 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1094 (__v4si)
1095 _mm_setzero_si128 (),
1096 (__mmask8) __U);
1097}
1098
1099extern __inline __m128i
1100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1101_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1102{
1103 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1104 (__v4si) __W,
1105 (__mmask8) __U);
1106}
1107
1108extern __inline __m128i
1109__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1110_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A)
1111{
1112 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1113 (__v4si)
1114 _mm_setzero_si128 (),
1115 (__mmask8) __U);
1116}
1117
1118extern __inline __m128i
1119__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1120_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1121{
1122 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1123 (__v4si) __W,
1124 (__mmask8) __U);
1125}
1126
1127extern __inline __m128i
1128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1129_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A)
1130{
1131 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1132 (__v4si)
1133 _mm_setzero_si128 (),
1134 (__mmask8) __U);
1135}
1136
1137extern __inline __m128i
1138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1139_mm256_cvttpd_epu32 (__m256d __A)
1140{
1141 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1142 (__v4si)
1143 _mm_setzero_si128 (),
1144 (__mmask8) -1);
1145}
1146
1147extern __inline __m128i
1148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1149_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
1150{
1151 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1152 (__v4si) __W,
1153 (__mmask8) __U);
1154}
1155
1156extern __inline __m128i
1157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1158_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A)
1159{
1160 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1161 (__v4si)
1162 _mm_setzero_si128 (),
1163 (__mmask8) __U);
1164}
1165
1166extern __inline __m128i
1167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1168_mm_cvttpd_epu32 (__m128d __A)
1169{
1170 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1171 (__v4si)
1172 _mm_setzero_si128 (),
1173 (__mmask8) -1);
1174}
1175
1176extern __inline __m128i
1177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1178_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
1179{
1180 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1181 (__v4si) __W,
1182 (__mmask8) __U);
1183}
1184
1185extern __inline __m128i
1186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1187_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A)
1188{
1189 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1190 (__v4si)
1191 _mm_setzero_si128 (),
1192 (__mmask8) __U);
1193}
1194
1195extern __inline __m128i
1196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1197_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1198{
1199 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1200 (__v4si) __W,
1201 (__mmask8) __U);
1202}
1203
1204extern __inline __m128i
1205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1206_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A)
1207{
1208 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1209 (__v4si)
1210 _mm_setzero_si128 (),
1211 (__mmask8) __U);
1212}
1213
1214extern __inline __m128i
1215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1216_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1217{
1218 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1219 (__v4si) __W,
1220 (__mmask8) __U);
1221}
1222
1223extern __inline __m128i
1224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1225_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A)
1226{
1227 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1228 (__v4si)
1229 _mm_setzero_si128 (),
1230 (__mmask8) __U);
1231}
1232
1233extern __inline __m256d
1234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1235_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1236{
1237 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1238 (__v4df) __W,
1239 (__mmask8) __U);
1240}
1241
1242extern __inline __m256d
1243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1244_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1245{
1246 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1247 (__v4df)
1248 _mm256_setzero_pd (),
1249 (__mmask8) __U);
1250}
1251
1252extern __inline __m128d
1253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1254_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1255{
1256 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1257 (__v2df) __W,
1258 (__mmask8) __U);
1259}
1260
1261extern __inline __m128d
1262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1263_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1264{
1265 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1266 (__v2df)
1267 _mm_setzero_pd (),
1268 (__mmask8) __U);
1269}
1270
1271extern __inline __m256d
1272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1273_mm256_cvtepu32_pd (__m128i __A)
1274{
1275 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1276 (__v4df)
1277 _mm256_setzero_pd (),
1278 (__mmask8) -1);
1279}
1280
1281extern __inline __m256d
1282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1283_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1284{
1285 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1286 (__v4df) __W,
1287 (__mmask8) __U);
1288}
1289
1290extern __inline __m256d
1291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1292_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1293{
1294 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1295 (__v4df)
1296 _mm256_setzero_pd (),
1297 (__mmask8) __U);
1298}
1299
1300extern __inline __m128d
1301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1302_mm_cvtepu32_pd (__m128i __A)
1303{
1304 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1305 (__v2df)
1306 _mm_setzero_pd (),
1307 (__mmask8) -1);
1308}
1309
1310extern __inline __m128d
1311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1312_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1313{
1314 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1315 (__v2df) __W,
1316 (__mmask8) __U);
1317}
1318
1319extern __inline __m128d
1320__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1321_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1322{
1323 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1324 (__v2df)
1325 _mm_setzero_pd (),
1326 (__mmask8) __U);
1327}
1328
1329extern __inline __m256
1330__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1331_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1332{
1333 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1334 (__v8sf) __W,
1335 (__mmask8) __U);
1336}
1337
1338extern __inline __m256
1339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1340_mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A)
1341{
1342 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1343 (__v8sf)
1344 _mm256_setzero_ps (),
1345 (__mmask8) __U);
1346}
1347
1348extern __inline __m128
1349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1350_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1351{
1352 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1353 (__v4sf) __W,
1354 (__mmask8) __U);
1355}
1356
1357extern __inline __m128
1358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1359_mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A)
1360{
1361 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1362 (__v4sf)
1363 _mm_setzero_ps (),
1364 (__mmask8) __U);
1365}
1366
1367extern __inline __m256
1368__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1369_mm256_cvtepu32_ps (__m256i __A)
1370{
1371 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1372 (__v8sf)
1373 _mm256_setzero_ps (),
1374 (__mmask8) -1);
1375}
1376
1377extern __inline __m256
1378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1379_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1380{
1381 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1382 (__v8sf) __W,
1383 (__mmask8) __U);
1384}
1385
1386extern __inline __m256
1387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1388_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A)
1389{
1390 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1391 (__v8sf)
1392 _mm256_setzero_ps (),
1393 (__mmask8) __U);
1394}
1395
1396extern __inline __m128
1397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1398_mm_cvtepu32_ps (__m128i __A)
1399{
1400 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1401 (__v4sf)
1402 _mm_setzero_ps (),
1403 (__mmask8) -1);
1404}
1405
1406extern __inline __m128
1407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1408_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1409{
1410 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1411 (__v4sf) __W,
1412 (__mmask8) __U);
1413}
1414
1415extern __inline __m128
1416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1417_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A)
1418{
1419 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1420 (__v4sf)
1421 _mm_setzero_ps (),
1422 (__mmask8) __U);
1423}
1424
1425extern __inline __m256d
1426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1427_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A)
1428{
1429 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1430 (__v4df) __W,
1431 (__mmask8) __U);
1432}
1433
1434extern __inline __m256d
1435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1436_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1437{
1438 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1439 (__v4df)
1440 _mm256_setzero_pd (),
1441 (__mmask8) __U);
1442}
1443
1444extern __inline __m128d
1445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1446_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A)
1447{
1448 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1449 (__v2df) __W,
1450 (__mmask8) __U);
1451}
1452
1453extern __inline __m128d
1454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1455_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1456{
1457 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1458 (__v2df)
1459 _mm_setzero_pd (),
1460 (__mmask8) __U);
1461}
1462
1463extern __inline __m128i
1464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1465_mm_cvtepi32_epi8 (__m128i __A)
1466{
1467 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1468 (__v16qi)_mm_undefined_si128(),
1469 (__mmask8) -1);
1470}
1471
1472extern __inline void
1473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1474_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1475{
1476 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1477}
1478
1479extern __inline __m128i
1480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1481_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1482{
1483 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1484 (__v16qi) __O, __M);
1485}
1486
1487extern __inline __m128i
1488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1489_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
1490{
1491 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1492 (__v16qi)
1493 _mm_setzero_si128 (),
1494 __M);
1495}
1496
1497extern __inline __m128i
1498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1499_mm256_cvtepi32_epi8 (__m256i __A)
1500{
1501 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1502 (__v16qi)_mm_undefined_si128(),
1503 (__mmask8) -1);
1504}
1505
1506extern __inline __m128i
1507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1508_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1509{
1510 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1511 (__v16qi) __O, __M);
1512}
1513
1514extern __inline void
1515__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1516_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1517{
1518 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1519}
1520
1521extern __inline __m128i
1522__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1523_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
1524{
1525 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1526 (__v16qi)
1527 _mm_setzero_si128 (),
1528 __M);
1529}
1530
1531extern __inline __m128i
1532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1533_mm_cvtsepi32_epi8 (__m128i __A)
1534{
1535 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1536 (__v16qi)_mm_undefined_si128(),
1537 (__mmask8) -1);
1538}
1539
1540extern __inline void
1541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1542_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1543{
1544 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1545}
1546
1547extern __inline __m128i
1548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1549_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1550{
1551 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1552 (__v16qi) __O, __M);
1553}
1554
1555extern __inline __m128i
1556__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1557_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
1558{
1559 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1560 (__v16qi)
1561 _mm_setzero_si128 (),
1562 __M);
1563}
1564
1565extern __inline __m128i
1566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1567_mm256_cvtsepi32_epi8 (__m256i __A)
1568{
1569 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1570 (__v16qi)_mm_undefined_si128(),
1571 (__mmask8) -1);
1572}
1573
1574extern __inline void
1575__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1576_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1577{
1578 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1579}
1580
1581extern __inline __m128i
1582__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1583_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1584{
1585 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1586 (__v16qi) __O, __M);
1587}
1588
1589extern __inline __m128i
1590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1591_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
1592{
1593 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1594 (__v16qi)
1595 _mm_setzero_si128 (),
1596 __M);
1597}
1598
1599extern __inline __m128i
1600__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1601_mm_cvtusepi32_epi8 (__m128i __A)
1602{
1603 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1604 (__v16qi)_mm_undefined_si128(),
1605 (__mmask8) -1);
1606}
1607
1608extern __inline void
1609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1610_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1611{
1612 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1613}
1614
1615extern __inline __m128i
1616__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1617_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1618{
1619 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1620 (__v16qi) __O,
1621 __M);
1622}
1623
1624extern __inline __m128i
1625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1626_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
1627{
1628 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1629 (__v16qi)
1630 _mm_setzero_si128 (),
1631 __M);
1632}
1633
1634extern __inline __m128i
1635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1636_mm256_cvtusepi32_epi8 (__m256i __A)
1637{
1638 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1639 (__v16qi)_mm_undefined_si128(),
1640 (__mmask8) -1);
1641}
1642
1643extern __inline void
1644__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1645_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1646{
1647 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
1648}
1649
1650extern __inline __m128i
1651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1652_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1653{
1654 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1655 (__v16qi) __O,
1656 __M);
1657}
1658
1659extern __inline __m128i
1660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1661_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
1662{
1663 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1664 (__v16qi)
1665 _mm_setzero_si128 (),
1666 __M);
1667}
1668
1669extern __inline __m128i
1670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1671_mm_cvtepi32_epi16 (__m128i __A)
1672{
1673 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1674 (__v8hi) _mm_setzero_si128 (),
1675 (__mmask8) -1);
1676}
1677
1678extern __inline void
1679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1680_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1681{
1682 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1683}
1684
1685extern __inline __m128i
1686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1687_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1688{
1689 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1690 (__v8hi) __O, __M);
1691}
1692
1693extern __inline __m128i
1694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1695_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
1696{
1697 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1698 (__v8hi)
1699 _mm_setzero_si128 (),
1700 __M);
1701}
1702
1703extern __inline __m128i
1704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705_mm256_cvtepi32_epi16 (__m256i __A)
1706{
1707 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1708 (__v8hi)_mm_setzero_si128 (),
1709 (__mmask8) -1);
1710}
1711
9ab4c07a 1712extern __inline void
936c0fe4
AI
1713__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1714_mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1715{
1716 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1717}
1718
1719extern __inline __m128i
1720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1721_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1722{
1723 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1724 (__v8hi) __O, __M);
1725}
1726
1727extern __inline __m128i
1728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1729_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
1730{
1731 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1732 (__v8hi)
1733 _mm_setzero_si128 (),
1734 __M);
1735}
1736
1737extern __inline __m128i
1738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1739_mm_cvtsepi32_epi16 (__m128i __A)
1740{
1741 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1742 (__v8hi)_mm_setzero_si128 (),
1743 (__mmask8) -1);
1744}
1745
1746extern __inline void
1747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1748_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1749{
1750 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1751}
1752
1753extern __inline __m128i
1754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1755_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1756{
1757 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1758 (__v8hi)__O,
1759 __M);
1760}
1761
1762extern __inline __m128i
1763__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1764_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
1765{
1766 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1767 (__v8hi)
1768 _mm_setzero_si128 (),
1769 __M);
1770}
1771
1772extern __inline __m128i
1773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1774_mm256_cvtsepi32_epi16 (__m256i __A)
1775{
1776 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1777 (__v8hi)_mm_undefined_si128(),
1778 (__mmask8) -1);
1779}
1780
1781extern __inline void
1782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1783_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1784{
1785 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1786}
1787
1788extern __inline __m128i
1789__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1790_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1791{
1792 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1793 (__v8hi) __O, __M);
1794}
1795
1796extern __inline __m128i
1797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1798_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
1799{
1800 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1801 (__v8hi)
1802 _mm_setzero_si128 (),
1803 __M);
1804}
1805
1806extern __inline __m128i
1807__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1808_mm_cvtusepi32_epi16 (__m128i __A)
1809{
1810 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1811 (__v8hi)_mm_undefined_si128(),
1812 (__mmask8) -1);
1813}
1814
9ab4c07a 1815extern __inline void
936c0fe4
AI
1816__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1817_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1818{
1819 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1820}
1821
1822extern __inline __m128i
1823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1824_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1825{
1826 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1827 (__v8hi) __O, __M);
1828}
1829
1830extern __inline __m128i
1831__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1832_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
1833{
1834 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1835 (__v8hi)
1836 _mm_setzero_si128 (),
1837 __M);
1838}
1839
1840extern __inline __m128i
1841__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1842_mm256_cvtusepi32_epi16 (__m256i __A)
1843{
1844 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1845 (__v8hi)_mm_undefined_si128(),
1846 (__mmask8) -1);
1847}
1848
1849extern __inline void
1850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1851_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1852{
1853 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1854}
1855
1856extern __inline __m128i
1857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1858_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1859{
1860 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1861 (__v8hi) __O, __M);
1862}
1863
1864extern __inline __m128i
1865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1866_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
1867{
1868 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1869 (__v8hi)
1870 _mm_setzero_si128 (),
1871 __M);
1872}
1873
1874extern __inline __m128i
1875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1876_mm_cvtepi64_epi8 (__m128i __A)
1877{
1878 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1879 (__v16qi)_mm_undefined_si128(),
1880 (__mmask8) -1);
1881}
1882
1883extern __inline void
1884__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1885_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1886{
1887 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1888}
1889
1890extern __inline __m128i
1891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1892_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1893{
1894 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1895 (__v16qi) __O, __M);
1896}
1897
1898extern __inline __m128i
1899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1900_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
1901{
1902 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1903 (__v16qi)
1904 _mm_setzero_si128 (),
1905 __M);
1906}
1907
1908extern __inline __m128i
1909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1910_mm256_cvtepi64_epi8 (__m256i __A)
1911{
1912 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1913 (__v16qi)_mm_undefined_si128(),
1914 (__mmask8) -1);
1915}
1916
1917extern __inline void
1918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1919_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1920{
1921 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1922}
1923
1924extern __inline __m128i
1925__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1926_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1927{
1928 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1929 (__v16qi) __O, __M);
1930}
1931
1932extern __inline __m128i
1933__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1934_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
1935{
1936 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1937 (__v16qi)
1938 _mm_setzero_si128 (),
1939 __M);
1940}
1941
1942extern __inline __m128i
1943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1944_mm_cvtsepi64_epi8 (__m128i __A)
1945{
1946 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1947 (__v16qi)_mm_undefined_si128(),
1948 (__mmask8) -1);
1949}
1950
1951extern __inline void
1952__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1953_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1954{
1955 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1956}
1957
1958extern __inline __m128i
1959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1960_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1961{
1962 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1963 (__v16qi) __O, __M);
1964}
1965
1966extern __inline __m128i
1967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1968_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
1969{
1970 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1971 (__v16qi)
1972 _mm_setzero_si128 (),
1973 __M);
1974}
1975
1976extern __inline __m128i
1977__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1978_mm256_cvtsepi64_epi8 (__m256i __A)
1979{
1980 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
1981 (__v16qi)_mm_undefined_si128(),
1982 (__mmask8) -1);
1983}
1984
1985extern __inline void
1986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1987_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1988{
1989 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1990}
1991
1992extern __inline __m128i
1993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1994_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1995{
1996 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
1997 (__v16qi) __O, __M);
1998}
1999
2000extern __inline __m128i
2001__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2002_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
2003{
2004 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
2005 (__v16qi)
2006 _mm_setzero_si128 (),
2007 __M);
2008}
2009
2010extern __inline __m128i
2011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2012_mm_cvtusepi64_epi8 (__m128i __A)
2013{
2014 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2015 (__v16qi)_mm_undefined_si128(),
2016 (__mmask8) -1);
2017}
2018
2019extern __inline void
2020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2021_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
2022{
2023 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
2024}
2025
2026extern __inline __m128i
2027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
2029{
2030 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2031 (__v16qi) __O,
2032 __M);
2033}
2034
2035extern __inline __m128i
2036__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2037_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
2038{
2039 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2040 (__v16qi)
2041 _mm_setzero_si128 (),
2042 __M);
2043}
2044
2045extern __inline __m128i
2046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2047_mm256_cvtusepi64_epi8 (__m256i __A)
2048{
2049 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2050 (__v16qi)_mm_undefined_si128(),
2051 (__mmask8) -1);
2052}
2053
2054extern __inline void
2055__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2056_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
2057{
2058 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
2059}
2060
2061extern __inline __m128i
2062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2063_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
2064{
2065 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2066 (__v16qi) __O,
2067 __M);
2068}
2069
2070extern __inline __m128i
2071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2072_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
2073{
2074 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2075 (__v16qi)
2076 _mm_setzero_si128 (),
2077 __M);
2078}
2079
2080extern __inline __m128i
2081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2082_mm_cvtepi64_epi16 (__m128i __A)
2083{
2084 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2085 (__v8hi)_mm_undefined_si128(),
2086 (__mmask8) -1);
2087}
2088
2089extern __inline void
2090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2091_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2092{
2093 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2094}
2095
2096extern __inline __m128i
2097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2098_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2099{
2100 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2101 (__v8hi)__O,
2102 __M);
2103}
2104
2105extern __inline __m128i
2106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2107_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
2108{
2109 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2110 (__v8hi)
2111 _mm_setzero_si128 (),
2112 __M);
2113}
2114
2115extern __inline __m128i
2116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2117_mm256_cvtepi64_epi16 (__m256i __A)
2118{
2119 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2120 (__v8hi)_mm_undefined_si128(),
2121 (__mmask8) -1);
2122}
2123
2124extern __inline void
2125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2126_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2127{
2128 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2129}
2130
2131extern __inline __m128i
2132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2133_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2134{
2135 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2136 (__v8hi) __O, __M);
2137}
2138
2139extern __inline __m128i
2140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2141_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
2142{
2143 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2144 (__v8hi)
2145 _mm_setzero_si128 (),
2146 __M);
2147}
2148
2149extern __inline __m128i
2150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2151_mm_cvtsepi64_epi16 (__m128i __A)
2152{
2153 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2154 (__v8hi)_mm_undefined_si128(),
2155 (__mmask8) -1);
2156}
2157
2158extern __inline void
2159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2160_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2161{
2162 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2163}
2164
2165extern __inline __m128i
2166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2167_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2168{
2169 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2170 (__v8hi) __O, __M);
2171}
2172
2173extern __inline __m128i
2174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2175_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
2176{
2177 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2178 (__v8hi)
2179 _mm_setzero_si128 (),
2180 __M);
2181}
2182
2183extern __inline __m128i
2184__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2185_mm256_cvtsepi64_epi16 (__m256i __A)
2186{
2187 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2188 (__v8hi)_mm_undefined_si128(),
2189 (__mmask8) -1);
2190}
2191
2192extern __inline void
2193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2194_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2195{
2196 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2197}
2198
2199extern __inline __m128i
2200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2201_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2202{
2203 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2204 (__v8hi) __O, __M);
2205}
2206
2207extern __inline __m128i
2208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2209_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
2210{
2211 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2212 (__v8hi)
2213 _mm_setzero_si128 (),
2214 __M);
2215}
2216
2217extern __inline __m128i
2218__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2219_mm_cvtusepi64_epi16 (__m128i __A)
2220{
2221 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2222 (__v8hi)_mm_undefined_si128(),
2223 (__mmask8) -1);
2224}
2225
2226extern __inline void
2227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2228_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2229{
2230 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2231}
2232
2233extern __inline __m128i
2234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2235_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2236{
2237 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2238 (__v8hi) __O, __M);
2239}
2240
2241extern __inline __m128i
2242__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2243_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
2244{
2245 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2246 (__v8hi)
2247 _mm_setzero_si128 (),
2248 __M);
2249}
2250
2251extern __inline __m128i
2252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2253_mm256_cvtusepi64_epi16 (__m256i __A)
2254{
2255 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2256 (__v8hi)_mm_undefined_si128(),
2257 (__mmask8) -1);
2258}
2259
2260extern __inline void
2261__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2262_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2263{
2264 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2265}
2266
2267extern __inline __m128i
2268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2269_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2270{
2271 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2272 (__v8hi) __O, __M);
2273}
2274
2275extern __inline __m128i
2276__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2277_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
2278{
2279 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2280 (__v8hi)
2281 _mm_setzero_si128 (),
2282 __M);
2283}
2284
2285extern __inline __m128i
2286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2287_mm_cvtepi64_epi32 (__m128i __A)
2288{
2289 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2290 (__v4si)_mm_undefined_si128(),
2291 (__mmask8) -1);
2292}
2293
2294extern __inline void
2295__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2296_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2297{
2298 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2299}
2300
2301extern __inline __m128i
2302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2303_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2304{
2305 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2306 (__v4si) __O, __M);
2307}
2308
2309extern __inline __m128i
2310__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2311_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
2312{
2313 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2314 (__v4si)
2315 _mm_setzero_si128 (),
2316 __M);
2317}
2318
2319extern __inline __m128i
2320__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2321_mm256_cvtepi64_epi32 (__m256i __A)
2322{
2323 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2324 (__v4si)_mm_undefined_si128(),
2325 (__mmask8) -1);
2326}
2327
2328extern __inline void
2329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2330_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2331{
2332 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2333}
2334
2335extern __inline __m128i
2336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2337_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2338{
2339 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2340 (__v4si) __O, __M);
2341}
2342
2343extern __inline __m128i
2344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2345_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
2346{
2347 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2348 (__v4si)
2349 _mm_setzero_si128 (),
2350 __M);
2351}
2352
2353extern __inline __m128i
2354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2355_mm_cvtsepi64_epi32 (__m128i __A)
2356{
2357 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2358 (__v4si)_mm_undefined_si128(),
2359 (__mmask8) -1);
2360}
2361
9ab4c07a 2362extern __inline void
936c0fe4
AI
2363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2364_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2365{
2366 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2367}
2368
2369extern __inline __m128i
2370__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2371_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2372{
2373 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2374 (__v4si) __O, __M);
2375}
2376
2377extern __inline __m128i
2378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2379_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
2380{
2381 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2382 (__v4si)
2383 _mm_setzero_si128 (),
2384 __M);
2385}
2386
2387extern __inline __m128i
2388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2389_mm256_cvtsepi64_epi32 (__m256i __A)
2390{
2391 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2392 (__v4si)_mm_undefined_si128(),
2393 (__mmask8) -1);
2394}
2395
2396extern __inline void
2397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2398_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2399{
2400 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2401}
2402
2403extern __inline __m128i
2404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2405_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2406{
2407 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2408 (__v4si)__O,
2409 __M);
2410}
2411
2412extern __inline __m128i
2413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2414_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
2415{
2416 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2417 (__v4si)
2418 _mm_setzero_si128 (),
2419 __M);
2420}
2421
2422extern __inline __m128i
2423__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2424_mm_cvtusepi64_epi32 (__m128i __A)
2425{
2426 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2427 (__v4si)_mm_undefined_si128(),
2428 (__mmask8) -1);
2429}
2430
2431extern __inline void
2432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2433_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2434{
2435 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2436}
2437
2438extern __inline __m128i
2439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2440_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2441{
2442 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2443 (__v4si) __O, __M);
2444}
2445
2446extern __inline __m128i
2447__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2448_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
2449{
2450 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2451 (__v4si)
2452 _mm_setzero_si128 (),
2453 __M);
2454}
2455
2456extern __inline __m128i
2457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2458_mm256_cvtusepi64_epi32 (__m256i __A)
2459{
2460 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2461 (__v4si)_mm_undefined_si128(),
2462 (__mmask8) -1);
2463}
2464
2465extern __inline void
2466__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2467_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2468{
2469 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2470}
2471
2472extern __inline __m128i
2473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2474_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2475{
2476 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2477 (__v4si) __O, __M);
2478}
2479
2480extern __inline __m128i
2481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2482_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
2483{
2484 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2485 (__v4si)
2486 _mm_setzero_si128 (),
2487 __M);
2488}
2489
2490extern __inline __m256
2491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2492_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
2493{
2494 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2495 (__v8sf) __O,
2496 __M);
2497}
2498
2499extern __inline __m256
2500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2501_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2502{
2503 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2504 (__v8sf)
2505 _mm256_setzero_ps (),
2506 __M);
2507}
2508
2509extern __inline __m128
2510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2511_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
2512{
2513 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2514 (__v4sf) __O,
2515 __M);
2516}
2517
2518extern __inline __m128
2519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2520_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2521{
2522 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2523 (__v4sf)
2524 _mm_setzero_ps (),
2525 __M);
2526}
2527
2528extern __inline __m256d
2529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2530_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
2531{
2532 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2533 (__v4df) __O,
2534 __M);
2535}
2536
2537extern __inline __m256d
2538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2539_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
2540{
2541 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2542 (__v4df)
2543 _mm256_setzero_pd (),
2544 __M);
2545}
2546
2547extern __inline __m256i
2548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2549_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
2550{
2551 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2552 (__v8si) __O,
2553 __M);
2554}
2555
2556extern __inline __m256i
2557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2558_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2559{
2560 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2561 (__v8si)
2562 _mm256_setzero_si256 (),
2563 __M);
2564}
2565
2566extern __inline __m256i
2567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2568_mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A)
2569{
2570 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O,
2571 __M);
2572}
2573
2574extern __inline __m256i
2575__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2576_mm256_maskz_set1_epi32 (__mmask8 __M, int __A)
2577{
2578 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A,
2579 (__v8si)
2580 _mm256_setzero_si256 (),
2581 __M);
2582}
2583
2584extern __inline __m128i
2585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2586_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2587{
2588 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2589 (__v4si) __O,
2590 __M);
2591}
2592
2593extern __inline __m128i
2594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2595_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2596{
2597 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2598 (__v4si)
2599 _mm_setzero_si128 (),
2600 __M);
2601}
2602
2603extern __inline __m128i
2604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2605_mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A)
2606{
2607 return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O,
2608 __M);
2609}
2610
2611extern __inline __m128i
2612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2613_mm_maskz_set1_epi32 (__mmask8 __M, int __A)
2614{
2615 return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A,
2616 (__v4si)
2617 _mm_setzero_si128 (),
2618 __M);
2619}
2620
2621extern __inline __m256i
2622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2623_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
2624{
2625 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2626 (__v4di) __O,
2627 __M);
2628}
2629
2630extern __inline __m256i
2631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2632_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2633{
2634 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2635 (__v4di)
2636 _mm256_setzero_si256 (),
2637 __M);
2638}
2639
2640extern __inline __m256i
2641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2642_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
2643{
936c0fe4
AI
2644 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
2645 __M);
936c0fe4
AI
2646}
2647
2648extern __inline __m256i
2649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2650_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
2651{
936c0fe4
AI
2652 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
2653 (__v4di)
2654 _mm256_setzero_si256 (),
2655 __M);
936c0fe4
AI
2656}
2657
2658extern __inline __m128i
2659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2660_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
2661{
2662 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2663 (__v2di) __O,
2664 __M);
2665}
2666
2667extern __inline __m128i
2668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2669_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2670{
2671 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2672 (__v2di)
2673 _mm_setzero_si128 (),
2674 __M);
2675}
2676
2677extern __inline __m128i
2678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2679_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
2680{
936c0fe4
AI
2681 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
2682 __M);
936c0fe4
AI
2683}
2684
2685extern __inline __m128i
2686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2687_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
2688{
936c0fe4
AI
2689 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
2690 (__v2di)
2691 _mm_setzero_si128 (),
2692 __M);
936c0fe4
AI
2693}
2694
2695extern __inline __m256
2696__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2697_mm256_broadcast_f32x4 (__m128 __A)
2698{
2699 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2700 (__v8sf)_mm256_undefined_pd (),
2701 (__mmask8) -
2702 1);
2703}
2704
2705extern __inline __m256
2706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2707_mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
2708{
2709 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2710 (__v8sf) __O,
2711 __M);
2712}
2713
2714extern __inline __m256
2715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2716_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
2717{
2718 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2719 (__v8sf)
2720 _mm256_setzero_ps (),
2721 __M);
2722}
2723
2724extern __inline __m256i
2725__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2726_mm256_broadcast_i32x4 (__m128i __A)
2727{
2728 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2729 __A,
2730 (__v8si)_mm256_undefined_si256 (),
2731 (__mmask8) -
2732 1);
2733}
2734
2735extern __inline __m256i
2736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2737_mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
2738{
2739 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2740 __A,
2741 (__v8si)
2742 __O, __M);
2743}
2744
2745extern __inline __m256i
2746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2747_mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
2748{
2749 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2750 __A,
2751 (__v8si)
2752 _mm256_setzero_si256 (),
2753 __M);
2754}
2755
2756extern __inline __m256i
2757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2758_mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2759{
2760 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2761 (__v8si) __W,
2762 (__mmask8) __U);
2763}
2764
2765extern __inline __m256i
2766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2767_mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2768{
2769 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2770 (__v8si)
2771 _mm256_setzero_si256 (),
2772 (__mmask8) __U);
2773}
2774
2775extern __inline __m128i
2776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2777_mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2778{
2779 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2780 (__v4si) __W,
2781 (__mmask8) __U);
2782}
2783
2784extern __inline __m128i
2785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2786_mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2787{
2788 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2789 (__v4si)
2790 _mm_setzero_si128 (),
2791 (__mmask8) __U);
2792}
2793
2794extern __inline __m256i
2795__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2796_mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2797{
2798 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2799 (__v4di) __W,
2800 (__mmask8) __U);
2801}
2802
2803extern __inline __m256i
2804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2805_mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2806{
2807 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2808 (__v4di)
2809 _mm256_setzero_si256 (),
2810 (__mmask8) __U);
2811}
2812
2813extern __inline __m128i
2814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2815_mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2816{
2817 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2818 (__v2di) __W,
2819 (__mmask8) __U);
2820}
2821
2822extern __inline __m128i
2823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2824_mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2825{
2826 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2827 (__v2di)
2828 _mm_setzero_si128 (),
2829 (__mmask8) __U);
2830}
2831
2832extern __inline __m256i
2833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2834_mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2835{
2836 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2837 (__v8si) __W,
2838 (__mmask8) __U);
2839}
2840
2841extern __inline __m256i
2842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2843_mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2844{
2845 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2846 (__v8si)
2847 _mm256_setzero_si256 (),
2848 (__mmask8) __U);
2849}
2850
2851extern __inline __m128i
2852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2853_mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2854{
2855 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2856 (__v4si) __W,
2857 (__mmask8) __U);
2858}
2859
2860extern __inline __m128i
2861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2862_mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2863{
2864 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2865 (__v4si)
2866 _mm_setzero_si128 (),
2867 (__mmask8) __U);
2868}
2869
2870extern __inline __m256i
2871__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2872_mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2873{
2874 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2875 (__v4di) __W,
2876 (__mmask8) __U);
2877}
2878
2879extern __inline __m256i
2880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2881_mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2882{
2883 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2884 (__v4di)
2885 _mm256_setzero_si256 (),
2886 (__mmask8) __U);
2887}
2888
2889extern __inline __m128i
2890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2891_mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2892{
2893 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2894 (__v2di) __W,
2895 (__mmask8) __U);
2896}
2897
2898extern __inline __m128i
2899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2900_mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2901{
2902 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2903 (__v2di)
2904 _mm_setzero_si128 (),
2905 (__mmask8) __U);
2906}
2907
2908extern __inline __m256i
2909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2910_mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
2911{
2912 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2913 (__v4di) __W,
2914 (__mmask8) __U);
2915}
2916
2917extern __inline __m256i
2918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2919_mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2920{
2921 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2922 (__v4di)
2923 _mm256_setzero_si256 (),
2924 (__mmask8) __U);
2925}
2926
2927extern __inline __m128i
2928__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2929_mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
2930{
2931 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2932 (__v2di) __W,
2933 (__mmask8) __U);
2934}
2935
2936extern __inline __m128i
2937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2938_mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2939{
2940 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2941 (__v2di)
2942 _mm_setzero_si128 (),
2943 (__mmask8) __U);
2944}
2945
2946extern __inline __m256i
2947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2948_mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2949{
2950 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2951 (__v8si) __W,
2952 (__mmask8) __U);
2953}
2954
2955extern __inline __m256i
2956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2957_mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2958{
2959 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2960 (__v8si)
2961 _mm256_setzero_si256 (),
2962 (__mmask8) __U);
2963}
2964
2965extern __inline __m128i
2966__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2967_mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2968{
2969 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
2970 (__v4si) __W,
2971 (__mmask8) __U);
2972}
2973
2974extern __inline __m128i
2975__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2976_mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2977{
2978 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
2979 (__v4si)
2980 _mm_setzero_si128 (),
2981 (__mmask8) __U);
2982}
2983
2984extern __inline __m256i
2985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2986_mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2987{
2988 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
2989 (__v4di) __W,
2990 (__mmask8) __U);
2991}
2992
2993extern __inline __m256i
2994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2995_mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
2996{
2997 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
2998 (__v4di)
2999 _mm256_setzero_si256 (),
3000 (__mmask8) __U);
3001}
3002
3003extern __inline __m128i
3004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3005_mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3006{
3007 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3008 (__v2di) __W,
3009 (__mmask8) __U);
3010}
3011
3012extern __inline __m128i
3013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3014_mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3015{
3016 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3017 (__v2di)
3018 _mm_setzero_si128 (),
3019 (__mmask8) __U);
3020}
3021
3022extern __inline __m256i
3023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3024_mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3025{
3026 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3027 (__v8si) __W,
3028 (__mmask8) __U);
3029}
3030
3031extern __inline __m256i
3032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3033_mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3034{
3035 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3036 (__v8si)
3037 _mm256_setzero_si256 (),
3038 (__mmask8) __U);
3039}
3040
3041extern __inline __m128i
3042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3043_mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
3044{
3045 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3046 (__v4si) __W,
3047 (__mmask8) __U);
3048}
3049
3050extern __inline __m128i
3051__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3052_mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3053{
3054 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3055 (__v4si)
3056 _mm_setzero_si128 (),
3057 (__mmask8) __U);
3058}
3059
3060extern __inline __m256i
3061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3062_mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
3063{
3064 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3065 (__v4di) __W,
3066 (__mmask8) __U);
3067}
3068
3069extern __inline __m256i
3070__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3071_mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3072{
3073 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3074 (__v4di)
3075 _mm256_setzero_si256 (),
3076 (__mmask8) __U);
3077}
3078
3079extern __inline __m128i
3080__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3081_mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3082{
3083 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3084 (__v2di) __W,
3085 (__mmask8) __U);
3086}
3087
3088extern __inline __m128i
3089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3090_mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3091{
3092 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3093 (__v2di)
3094 _mm_setzero_si128 (),
3095 (__mmask8) __U);
3096}
3097
3098extern __inline __m256i
3099__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3100_mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
3101{
3102 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3103 (__v4di) __W,
3104 (__mmask8) __U);
3105}
3106
3107extern __inline __m256i
3108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3109_mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3110{
3111 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3112 (__v4di)
3113 _mm256_setzero_si256 (),
3114 (__mmask8) __U);
3115}
3116
3117extern __inline __m128i
3118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3119_mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
3120{
3121 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3122 (__v2di) __W,
3123 (__mmask8) __U);
3124}
3125
3126extern __inline __m128i
3127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3128_mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3129{
3130 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3131 (__v2di)
3132 _mm_setzero_si128 (),
3133 (__mmask8) __U);
3134}
3135
3136extern __inline __m256d
3137__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3138_mm256_rcp14_pd (__m256d __A)
3139{
3140 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3141 (__v4df)
3142 _mm256_setzero_pd (),
3143 (__mmask8) -1);
3144}
3145
3146extern __inline __m256d
3147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3148_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3149{
3150 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3151 (__v4df) __W,
3152 (__mmask8) __U);
3153}
3154
3155extern __inline __m256d
3156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3157_mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
3158{
3159 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3160 (__v4df)
3161 _mm256_setzero_pd (),
3162 (__mmask8) __U);
3163}
3164
3165extern __inline __m128d
3166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3167_mm_rcp14_pd (__m128d __A)
3168{
3169 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3170 (__v2df)
3171 _mm_setzero_pd (),
3172 (__mmask8) -1);
3173}
3174
3175extern __inline __m128d
3176__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3177_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3178{
3179 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3180 (__v2df) __W,
3181 (__mmask8) __U);
3182}
3183
3184extern __inline __m128d
3185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3186_mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
3187{
3188 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3189 (__v2df)
3190 _mm_setzero_pd (),
3191 (__mmask8) __U);
3192}
3193
3194extern __inline __m256
3195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3196_mm256_rcp14_ps (__m256 __A)
3197{
3198 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3199 (__v8sf)
3200 _mm256_setzero_ps (),
3201 (__mmask8) -1);
3202}
3203
3204extern __inline __m256
3205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3206_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3207{
3208 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3209 (__v8sf) __W,
3210 (__mmask8) __U);
3211}
3212
3213extern __inline __m256
3214__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3215_mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
3216{
3217 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3218 (__v8sf)
3219 _mm256_setzero_ps (),
3220 (__mmask8) __U);
3221}
3222
3223extern __inline __m128
3224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3225_mm_rcp14_ps (__m128 __A)
3226{
3227 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3228 (__v4sf)
3229 _mm_setzero_ps (),
3230 (__mmask8) -1);
3231}
3232
3233extern __inline __m128
3234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3235_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3236{
3237 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3238 (__v4sf) __W,
3239 (__mmask8) __U);
3240}
3241
3242extern __inline __m128
3243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3244_mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
3245{
3246 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3247 (__v4sf)
3248 _mm_setzero_ps (),
3249 (__mmask8) __U);
3250}
3251
3252extern __inline __m256d
3253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3254_mm256_rsqrt14_pd (__m256d __A)
3255{
3256 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3257 (__v4df)
3258 _mm256_setzero_pd (),
3259 (__mmask8) -1);
3260}
3261
3262extern __inline __m256d
3263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3264_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3265{
3266 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3267 (__v4df) __W,
3268 (__mmask8) __U);
3269}
3270
3271extern __inline __m256d
3272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3273_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
3274{
3275 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3276 (__v4df)
3277 _mm256_setzero_pd (),
3278 (__mmask8) __U);
3279}
3280
3281extern __inline __m128d
3282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3283_mm_rsqrt14_pd (__m128d __A)
3284{
3285 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3286 (__v2df)
3287 _mm_setzero_pd (),
3288 (__mmask8) -1);
3289}
3290
3291extern __inline __m128d
3292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3293_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3294{
3295 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3296 (__v2df) __W,
3297 (__mmask8) __U);
3298}
3299
3300extern __inline __m128d
3301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3302_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
3303{
3304 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3305 (__v2df)
3306 _mm_setzero_pd (),
3307 (__mmask8) __U);
3308}
3309
3310extern __inline __m256
3311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3312_mm256_rsqrt14_ps (__m256 __A)
3313{
3314 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3315 (__v8sf)
3316 _mm256_setzero_ps (),
3317 (__mmask8) -1);
3318}
3319
3320extern __inline __m256
3321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3322_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3323{
3324 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3325 (__v8sf) __W,
3326 (__mmask8) __U);
3327}
3328
3329extern __inline __m256
3330__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3331_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
3332{
3333 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3334 (__v8sf)
3335 _mm256_setzero_ps (),
3336 (__mmask8) __U);
3337}
3338
3339extern __inline __m128
3340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3341_mm_rsqrt14_ps (__m128 __A)
3342{
3343 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3344 (__v4sf)
3345 _mm_setzero_ps (),
3346 (__mmask8) -1);
3347}
3348
3349extern __inline __m128
3350__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3351_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3352{
3353 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3354 (__v4sf) __W,
3355 (__mmask8) __U);
3356}
3357
3358extern __inline __m128
3359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3360_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
3361{
3362 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3363 (__v4sf)
3364 _mm_setzero_ps (),
3365 (__mmask8) __U);
3366}
3367
3368extern __inline __m256d
3369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3370_mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A)
3371{
3372 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3373 (__v4df) __W,
3374 (__mmask8) __U);
3375}
3376
3377extern __inline __m256d
3378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3379_mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A)
3380{
3381 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3382 (__v4df)
3383 _mm256_setzero_pd (),
3384 (__mmask8) __U);
3385}
3386
3387extern __inline __m128d
3388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3389_mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A)
3390{
3391 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3392 (__v2df) __W,
3393 (__mmask8) __U);
3394}
3395
3396extern __inline __m128d
3397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3398_mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A)
3399{
3400 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3401 (__v2df)
3402 _mm_setzero_pd (),
3403 (__mmask8) __U);
3404}
3405
3406extern __inline __m256
3407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3408_mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A)
3409{
3410 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3411 (__v8sf) __W,
3412 (__mmask8) __U);
3413}
3414
3415extern __inline __m256
3416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3417_mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A)
3418{
3419 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3420 (__v8sf)
3421 _mm256_setzero_ps (),
3422 (__mmask8) __U);
3423}
3424
3425extern __inline __m128
3426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3427_mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A)
3428{
3429 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3430 (__v4sf) __W,
3431 (__mmask8) __U);
3432}
3433
3434extern __inline __m128
3435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3436_mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A)
3437{
3438 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3439 (__v4sf)
3440 _mm_setzero_ps (),
3441 (__mmask8) __U);
3442}
3443
3444extern __inline __m256i
3445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3446_mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3447 __m256i __B)
3448{
3449 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3450 (__v8si) __B,
3451 (__v8si) __W,
3452 (__mmask8) __U);
3453}
3454
3455extern __inline __m256i
3456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3457_mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3458{
3459 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3460 (__v8si) __B,
3461 (__v8si)
3462 _mm256_setzero_si256 (),
3463 (__mmask8) __U);
3464}
3465
3466extern __inline __m256i
3467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3468_mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3469 __m256i __B)
3470{
3471 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3472 (__v4di) __B,
3473 (__v4di) __W,
3474 (__mmask8) __U);
3475}
3476
3477extern __inline __m256i
3478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3479_mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3480{
3481 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3482 (__v4di) __B,
3483 (__v4di)
3484 _mm256_setzero_si256 (),
3485 (__mmask8) __U);
3486}
3487
3488extern __inline __m256i
3489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3490_mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3491 __m256i __B)
3492{
3493 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3494 (__v8si) __B,
3495 (__v8si) __W,
3496 (__mmask8) __U);
3497}
3498
3499extern __inline __m256i
3500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3501_mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3502{
3503 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3504 (__v8si) __B,
3505 (__v8si)
3506 _mm256_setzero_si256 (),
3507 (__mmask8) __U);
3508}
3509
3510extern __inline __m256i
3511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3512_mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3513 __m256i __B)
3514{
3515 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3516 (__v4di) __B,
3517 (__v4di) __W,
3518 (__mmask8) __U);
3519}
3520
3521extern __inline __m256i
3522__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3523_mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3524{
3525 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3526 (__v4di) __B,
3527 (__v4di)
3528 _mm256_setzero_si256 (),
3529 (__mmask8) __U);
3530}
3531
3532extern __inline __m128i
3533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3534_mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3535 __m128i __B)
3536{
3537 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3538 (__v4si) __B,
3539 (__v4si) __W,
3540 (__mmask8) __U);
3541}
3542
3543extern __inline __m128i
3544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3545_mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3546{
3547 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3548 (__v4si) __B,
3549 (__v4si)
3550 _mm_setzero_si128 (),
3551 (__mmask8) __U);
3552}
3553
3554extern __inline __m128i
3555__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3556_mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3557 __m128i __B)
3558{
3559 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3560 (__v2di) __B,
3561 (__v2di) __W,
3562 (__mmask8) __U);
3563}
3564
3565extern __inline __m128i
3566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3567_mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3568{
3569 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3570 (__v2di) __B,
3571 (__v2di)
3572 _mm_setzero_si128 (),
3573 (__mmask8) __U);
3574}
3575
3576extern __inline __m128i
3577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3578_mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3579 __m128i __B)
3580{
3581 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3582 (__v4si) __B,
3583 (__v4si) __W,
3584 (__mmask8) __U);
3585}
3586
3587extern __inline __m128i
3588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3589_mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3590{
3591 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3592 (__v4si) __B,
3593 (__v4si)
3594 _mm_setzero_si128 (),
3595 (__mmask8) __U);
3596}
3597
3598extern __inline __m128i
3599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3600_mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3601 __m128i __B)
3602{
3603 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3604 (__v2di) __B,
3605 (__v2di) __W,
3606 (__mmask8) __U);
3607}
3608
3609extern __inline __m128i
3610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3611_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3612{
3613 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3614 (__v2di) __B,
3615 (__v2di)
3616 _mm_setzero_si128 (),
3617 (__mmask8) __U);
3618}
3619
3620extern __inline __m256
3621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3622_mm256_getexp_ps (__m256 __A)
3623{
3624 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3625 (__v8sf)
3626 _mm256_setzero_ps (),
3627 (__mmask8) -1);
3628}
3629
3630extern __inline __m256
3631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3632_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A)
3633{
3634 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3635 (__v8sf) __W,
3636 (__mmask8) __U);
3637}
3638
3639extern __inline __m256
3640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3641_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A)
3642{
3643 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3644 (__v8sf)
3645 _mm256_setzero_ps (),
3646 (__mmask8) __U);
3647}
3648
3649extern __inline __m256d
3650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3651_mm256_getexp_pd (__m256d __A)
3652{
3653 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3654 (__v4df)
3655 _mm256_setzero_pd (),
3656 (__mmask8) -1);
3657}
3658
3659extern __inline __m256d
3660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3661_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A)
3662{
3663 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3664 (__v4df) __W,
3665 (__mmask8) __U);
3666}
3667
3668extern __inline __m256d
3669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3670_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A)
3671{
3672 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3673 (__v4df)
3674 _mm256_setzero_pd (),
3675 (__mmask8) __U);
3676}
3677
3678extern __inline __m128
3679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3680_mm_getexp_ps (__m128 __A)
3681{
3682 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3683 (__v4sf)
3684 _mm_setzero_ps (),
3685 (__mmask8) -1);
3686}
3687
3688extern __inline __m128
3689__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3690_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A)
3691{
3692 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3693 (__v4sf) __W,
3694 (__mmask8) __U);
3695}
3696
3697extern __inline __m128
3698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3699_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A)
3700{
3701 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3702 (__v4sf)
3703 _mm_setzero_ps (),
3704 (__mmask8) __U);
3705}
3706
3707extern __inline __m128d
3708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3709_mm_getexp_pd (__m128d __A)
3710{
3711 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3712 (__v2df)
3713 _mm_setzero_pd (),
3714 (__mmask8) -1);
3715}
3716
3717extern __inline __m128d
3718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3719_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A)
3720{
3721 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3722 (__v2df) __W,
3723 (__mmask8) __U);
3724}
3725
3726extern __inline __m128d
3727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3728_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A)
3729{
3730 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3731 (__v2df)
3732 _mm_setzero_pd (),
3733 (__mmask8) __U);
3734}
3735
3736extern __inline __m256i
3737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3738_mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3739 __m128i __B)
3740{
3741 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3742 (__v4si) __B,
3743 (__v8si) __W,
3744 (__mmask8) __U);
3745}
3746
3747extern __inline __m256i
3748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3749_mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
3750{
3751 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3752 (__v4si) __B,
3753 (__v8si)
3754 _mm256_setzero_si256 (),
3755 (__mmask8) __U);
3756}
3757
3758extern __inline __m128i
3759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3760_mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3761 __m128i __B)
3762{
3763 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3764 (__v4si) __B,
3765 (__v4si) __W,
3766 (__mmask8) __U);
3767}
3768
3769extern __inline __m128i
3770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3771_mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3772{
3773 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3774 (__v4si) __B,
3775 (__v4si)
3776 _mm_setzero_si128 (),
3777 (__mmask8) __U);
3778}
3779
3780extern __inline __m256i
3781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3782_mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3783 __m128i __B)
3784{
3785 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3786 (__v2di) __B,
3787 (__v4di) __W,
3788 (__mmask8) __U);
3789}
3790
3791extern __inline __m256i
3792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3793_mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
3794{
3795 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3796 (__v2di) __B,
3797 (__v4di)
3798 _mm256_setzero_si256 (),
3799 (__mmask8) __U);
3800}
3801
3802extern __inline __m128i
3803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3804_mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3805 __m128i __B)
3806{
3807 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3808 (__v2di) __B,
3809 (__v2di) __W,
3810 (__mmask8) __U);
3811}
3812
3813extern __inline __m128i
3814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3815_mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3816{
3817 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3818 (__v2di) __B,
3819 (__v2di)
3820 _mm_setzero_di (),
3821 (__mmask8) __U);
3822}
3823
3824extern __inline __m256i
3825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3826_mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3827 __m256i __B)
3828{
3829 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3830 (__v8si) __B,
3831 (__v8si) __W,
3832 (__mmask8) __U);
3833}
3834
3835extern __inline __m256i
3836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3837_mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3838{
3839 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3840 (__v8si) __B,
3841 (__v8si)
3842 _mm256_setzero_si256 (),
3843 (__mmask8) __U);
3844}
3845
3846extern __inline __m256d
3847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3848_mm256_scalef_pd (__m256d __A, __m256d __B)
3849{
3850 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3851 (__v4df) __B,
3852 (__v4df)
3853 _mm256_setzero_pd (),
3854 (__mmask8) -1);
3855}
3856
3857extern __inline __m256d
3858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3859_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3860 __m256d __B)
3861{
3862 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3863 (__v4df) __B,
3864 (__v4df) __W,
3865 (__mmask8) __U);
3866}
3867
3868extern __inline __m256d
3869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3870_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B)
3871{
3872 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3873 (__v4df) __B,
3874 (__v4df)
3875 _mm256_setzero_pd (),
3876 (__mmask8) __U);
3877}
3878
3879extern __inline __m256
3880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3881_mm256_scalef_ps (__m256 __A, __m256 __B)
3882{
3883 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3884 (__v8sf) __B,
3885 (__v8sf)
3886 _mm256_setzero_ps (),
3887 (__mmask8) -1);
3888}
3889
3890extern __inline __m256
3891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3892_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3893 __m256 __B)
3894{
3895 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3896 (__v8sf) __B,
3897 (__v8sf) __W,
3898 (__mmask8) __U);
3899}
3900
3901extern __inline __m256
3902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3903_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B)
3904{
3905 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3906 (__v8sf) __B,
3907 (__v8sf)
3908 _mm256_setzero_ps (),
3909 (__mmask8) __U);
3910}
3911
3912extern __inline __m128d
3913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3914_mm_scalef_pd (__m128d __A, __m128d __B)
3915{
3916 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3917 (__v2df) __B,
3918 (__v2df)
3919 _mm_setzero_pd (),
3920 (__mmask8) -1);
3921}
3922
3923extern __inline __m128d
3924__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3925_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3926 __m128d __B)
3927{
3928 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3929 (__v2df) __B,
3930 (__v2df) __W,
3931 (__mmask8) __U);
3932}
3933
3934extern __inline __m128d
3935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3936_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B)
3937{
3938 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3939 (__v2df) __B,
3940 (__v2df)
3941 _mm_setzero_pd (),
3942 (__mmask8) __U);
3943}
3944
3945extern __inline __m128
3946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3947_mm_scalef_ps (__m128 __A, __m128 __B)
3948{
3949 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3950 (__v4sf) __B,
3951 (__v4sf)
3952 _mm_setzero_ps (),
3953 (__mmask8) -1);
3954}
3955
3956extern __inline __m128
3957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3958_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
3959{
3960 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3961 (__v4sf) __B,
3962 (__v4sf) __W,
3963 (__mmask8) __U);
3964}
3965
3966extern __inline __m128
3967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3968_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B)
3969{
3970 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3971 (__v4sf) __B,
3972 (__v4sf)
3973 _mm_setzero_ps (),
3974 (__mmask8) __U);
3975}
3976
3977extern __inline __m256d
3978__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3979_mm256_mask_fmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
3980 __m256d __C)
3981{
3982 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
3983 (__v4df) __B,
3984 (__v4df) __C,
3985 (__mmask8) __U);
3986}
3987
3988extern __inline __m256d
3989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3990_mm256_mask3_fmadd_pd (__m256d __A, __m256d __B, __m256d __C,
3991 __mmask8 __U)
3992{
3993 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
3994 (__v4df) __B,
3995 (__v4df) __C,
3996 (__mmask8) __U);
3997}
3998
3999extern __inline __m256d
4000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4001_mm256_maskz_fmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4002 __m256d __C)
4003{
4004 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4005 (__v4df) __B,
4006 (__v4df) __C,
4007 (__mmask8) __U);
4008}
4009
4010extern __inline __m128d
4011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4012_mm_mask_fmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4013{
4014 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4015 (__v2df) __B,
4016 (__v2df) __C,
4017 (__mmask8) __U);
4018}
4019
4020extern __inline __m128d
4021__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4022_mm_mask3_fmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4023 __mmask8 __U)
4024{
4025 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
4026 (__v2df) __B,
4027 (__v2df) __C,
4028 (__mmask8) __U);
4029}
4030
4031extern __inline __m128d
4032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4033_mm_maskz_fmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4034 __m128d __C)
4035{
4036 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4037 (__v2df) __B,
4038 (__v2df) __C,
4039 (__mmask8) __U);
4040}
4041
4042extern __inline __m256
4043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4044_mm256_mask_fmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4045{
4046 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4047 (__v8sf) __B,
4048 (__v8sf) __C,
4049 (__mmask8) __U);
4050}
4051
4052extern __inline __m256
4053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4054_mm256_mask3_fmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4055 __mmask8 __U)
4056{
4057 return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
4058 (__v8sf) __B,
4059 (__v8sf) __C,
4060 (__mmask8) __U);
4061}
4062
4063extern __inline __m256
4064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4065_mm256_maskz_fmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4066 __m256 __C)
4067{
4068 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4069 (__v8sf) __B,
4070 (__v8sf) __C,
4071 (__mmask8) __U);
4072}
4073
4074extern __inline __m128
4075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4076_mm_mask_fmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4077{
4078 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4079 (__v4sf) __B,
4080 (__v4sf) __C,
4081 (__mmask8) __U);
4082}
4083
4084extern __inline __m128
4085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4086_mm_mask3_fmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4087{
4088 return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
4089 (__v4sf) __B,
4090 (__v4sf) __C,
4091 (__mmask8) __U);
4092}
4093
4094extern __inline __m128
4095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4096_mm_maskz_fmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4097{
4098 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4099 (__v4sf) __B,
4100 (__v4sf) __C,
4101 (__mmask8) __U);
4102}
4103
4104extern __inline __m256d
4105__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4106_mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4107 __m256d __C)
4108{
4109 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
4110 (__v4df) __B,
4111 -(__v4df) __C,
4112 (__mmask8) __U);
4113}
4114
4115extern __inline __m256d
4116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4117_mm256_mask3_fmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4118 __mmask8 __U)
4119{
4120 return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
4121 (__v4df) __B,
4122 (__v4df) __C,
4123 (__mmask8) __U);
4124}
4125
4126extern __inline __m256d
4127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4128_mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4129 __m256d __C)
4130{
4131 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4132 (__v4df) __B,
4133 -(__v4df) __C,
4134 (__mmask8) __U);
4135}
4136
4137extern __inline __m128d
4138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4139_mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4140{
4141 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4142 (__v2df) __B,
4143 -(__v2df) __C,
4144 (__mmask8) __U);
4145}
4146
4147extern __inline __m128d
4148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4149_mm_mask3_fmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4150 __mmask8 __U)
4151{
4152 return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
4153 (__v2df) __B,
4154 (__v2df) __C,
4155 (__mmask8) __U);
4156}
4157
4158extern __inline __m128d
4159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4160_mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4161 __m128d __C)
4162{
4163 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4164 (__v2df) __B,
4165 -(__v2df) __C,
4166 (__mmask8) __U);
4167}
4168
4169extern __inline __m256
4170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4171_mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4172{
4173 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4174 (__v8sf) __B,
4175 -(__v8sf) __C,
4176 (__mmask8) __U);
4177}
4178
4179extern __inline __m256
4180__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4181_mm256_mask3_fmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4182 __mmask8 __U)
4183{
4184 return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
4185 (__v8sf) __B,
4186 (__v8sf) __C,
4187 (__mmask8) __U);
4188}
4189
4190extern __inline __m256
4191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4192_mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4193 __m256 __C)
4194{
4195 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4196 (__v8sf) __B,
4197 -(__v8sf) __C,
4198 (__mmask8) __U);
4199}
4200
4201extern __inline __m128
4202__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4203_mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4204{
4205 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4206 (__v4sf) __B,
4207 -(__v4sf) __C,
4208 (__mmask8) __U);
4209}
4210
4211extern __inline __m128
4212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4213_mm_mask3_fmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4214{
4215 return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
4216 (__v4sf) __B,
4217 (__v4sf) __C,
4218 (__mmask8) __U);
4219}
4220
4221extern __inline __m128
4222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4223_mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4224{
4225 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4226 (__v4sf) __B,
4227 -(__v4sf) __C,
4228 (__mmask8) __U);
4229}
4230
4231extern __inline __m256d
4232__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4233_mm256_mask_fmaddsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4234 __m256d __C)
4235{
4236 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4237 (__v4df) __B,
4238 (__v4df) __C,
4239 (__mmask8) __U);
4240}
4241
4242extern __inline __m256d
4243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4244_mm256_mask3_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C,
4245 __mmask8 __U)
4246{
4247 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
4248 (__v4df) __B,
4249 (__v4df) __C,
4250 (__mmask8)
4251 __U);
4252}
4253
4254extern __inline __m256d
4255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4256_mm256_maskz_fmaddsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4257 __m256d __C)
4258{
4259 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4260 (__v4df) __B,
4261 (__v4df) __C,
4262 (__mmask8)
4263 __U);
4264}
4265
4266extern __inline __m128d
4267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4268_mm_mask_fmaddsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4269 __m128d __C)
4270{
4271 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4272 (__v2df) __B,
4273 (__v2df) __C,
4274 (__mmask8) __U);
4275}
4276
4277extern __inline __m128d
4278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4279_mm_mask3_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C,
4280 __mmask8 __U)
4281{
4282 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
4283 (__v2df) __B,
4284 (__v2df) __C,
4285 (__mmask8)
4286 __U);
4287}
4288
4289extern __inline __m128d
4290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4291_mm_maskz_fmaddsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4292 __m128d __C)
4293{
4294 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4295 (__v2df) __B,
4296 (__v2df) __C,
4297 (__mmask8)
4298 __U);
4299}
4300
4301extern __inline __m256
4302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4303_mm256_mask_fmaddsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4304 __m256 __C)
4305{
4306 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4307 (__v8sf) __B,
4308 (__v8sf) __C,
4309 (__mmask8) __U);
4310}
4311
4312extern __inline __m256
4313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4314_mm256_mask3_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C,
4315 __mmask8 __U)
4316{
4317 return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
4318 (__v8sf) __B,
4319 (__v8sf) __C,
4320 (__mmask8) __U);
4321}
4322
4323extern __inline __m256
4324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4325_mm256_maskz_fmaddsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4326 __m256 __C)
4327{
4328 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4329 (__v8sf) __B,
4330 (__v8sf) __C,
4331 (__mmask8) __U);
4332}
4333
4334extern __inline __m128
4335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4336_mm_mask_fmaddsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4337{
4338 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4339 (__v4sf) __B,
4340 (__v4sf) __C,
4341 (__mmask8) __U);
4342}
4343
4344extern __inline __m128
4345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4346_mm_mask3_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C,
4347 __mmask8 __U)
4348{
4349 return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
4350 (__v4sf) __B,
4351 (__v4sf) __C,
4352 (__mmask8) __U);
4353}
4354
4355extern __inline __m128
4356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4357_mm_maskz_fmaddsub_ps (__mmask8 __U, __m128 __A, __m128 __B,
4358 __m128 __C)
4359{
4360 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4361 (__v4sf) __B,
4362 (__v4sf) __C,
4363 (__mmask8) __U);
4364}
4365
4366extern __inline __m256d
4367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4368_mm256_mask_fmsubadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4369 __m256d __C)
4370{
4371 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4372 (__v4df) __B,
4373 -(__v4df) __C,
4374 (__mmask8) __U);
4375}
4376
4377extern __inline __m256d
4378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4379_mm256_mask3_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C,
4380 __mmask8 __U)
4381{
4382 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
4383 (__v4df) __B,
4384 (__v4df) __C,
4385 (__mmask8)
4386 __U);
4387}
4388
4389extern __inline __m256d
4390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4391_mm256_maskz_fmsubadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4392 __m256d __C)
4393{
4394 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4395 (__v4df) __B,
4396 -(__v4df) __C,
4397 (__mmask8)
4398 __U);
4399}
4400
4401extern __inline __m128d
4402__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4403_mm_mask_fmsubadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4404 __m128d __C)
4405{
4406 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4407 (__v2df) __B,
4408 -(__v2df) __C,
4409 (__mmask8) __U);
4410}
4411
4412extern __inline __m128d
4413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4414_mm_mask3_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C,
4415 __mmask8 __U)
4416{
4417 return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
4418 (__v2df) __B,
4419 (__v2df) __C,
4420 (__mmask8)
4421 __U);
4422}
4423
4424extern __inline __m128d
4425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4426_mm_maskz_fmsubadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4427 __m128d __C)
4428{
4429 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4430 (__v2df) __B,
4431 -(__v2df) __C,
4432 (__mmask8)
4433 __U);
4434}
4435
4436extern __inline __m256
4437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4438_mm256_mask_fmsubadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4439 __m256 __C)
4440{
4441 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4442 (__v8sf) __B,
4443 -(__v8sf) __C,
4444 (__mmask8) __U);
4445}
4446
4447extern __inline __m256
4448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4449_mm256_mask3_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C,
4450 __mmask8 __U)
4451{
4452 return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
4453 (__v8sf) __B,
4454 (__v8sf) __C,
4455 (__mmask8) __U);
4456}
4457
4458extern __inline __m256
4459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4460_mm256_maskz_fmsubadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4461 __m256 __C)
4462{
4463 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4464 (__v8sf) __B,
4465 -(__v8sf) __C,
4466 (__mmask8) __U);
4467}
4468
4469extern __inline __m128
4470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4471_mm_mask_fmsubadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4472{
4473 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4474 (__v4sf) __B,
4475 -(__v4sf) __C,
4476 (__mmask8) __U);
4477}
4478
4479extern __inline __m128
4480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4481_mm_mask3_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C,
4482 __mmask8 __U)
4483{
4484 return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
4485 (__v4sf) __B,
4486 (__v4sf) __C,
4487 (__mmask8) __U);
4488}
4489
4490extern __inline __m128
4491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4492_mm_maskz_fmsubadd_ps (__mmask8 __U, __m128 __A, __m128 __B,
4493 __m128 __C)
4494{
4495 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4496 (__v4sf) __B,
4497 -(__v4sf) __C,
4498 (__mmask8) __U);
4499}
4500
4501extern __inline __m256d
4502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4503_mm256_mask_fnmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4504 __m256d __C)
4505{
4506 return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
4507 (__v4df) __B,
4508 (__v4df) __C,
4509 (__mmask8) __U);
4510}
4511
4512extern __inline __m256d
4513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4514_mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4515 __mmask8 __U)
4516{
4517 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
4518 (__v4df) __B,
4519 (__v4df) __C,
4520 (__mmask8) __U);
4521}
4522
4523extern __inline __m256d
4524__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4525_mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4526 __m256d __C)
4527{
4528 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
4529 (__v4df) __B,
4530 (__v4df) __C,
4531 (__mmask8) __U);
4532}
4533
4534extern __inline __m128d
4535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4536_mm_mask_fnmadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4537 __m128d __C)
4538{
4539 return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
4540 (__v2df) __B,
4541 (__v2df) __C,
4542 (__mmask8) __U);
4543}
4544
4545extern __inline __m128d
4546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4547_mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4548 __mmask8 __U)
4549{
4550 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
4551 (__v2df) __B,
4552 (__v2df) __C,
4553 (__mmask8) __U);
4554}
4555
4556extern __inline __m128d
4557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4558_mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4559 __m128d __C)
4560{
4561 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
4562 (__v2df) __B,
4563 (__v2df) __C,
4564 (__mmask8) __U);
4565}
4566
4567extern __inline __m256
4568__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4569_mm256_mask_fnmadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4570 __m256 __C)
4571{
4572 return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
4573 (__v8sf) __B,
4574 (__v8sf) __C,
4575 (__mmask8) __U);
4576}
4577
4578extern __inline __m256
4579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4580_mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4581 __mmask8 __U)
4582{
4583 return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
4584 (__v8sf) __B,
4585 (__v8sf) __C,
4586 (__mmask8) __U);
4587}
4588
4589extern __inline __m256
4590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4591_mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4592 __m256 __C)
4593{
4594 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
4595 (__v8sf) __B,
4596 (__v8sf) __C,
4597 (__mmask8) __U);
4598}
4599
4600extern __inline __m128
4601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4602_mm_mask_fnmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4603{
4604 return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
4605 (__v4sf) __B,
4606 (__v4sf) __C,
4607 (__mmask8) __U);
4608}
4609
4610extern __inline __m128
4611__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4612_mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4613{
4614 return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
4615 (__v4sf) __B,
4616 (__v4sf) __C,
4617 (__mmask8) __U);
4618}
4619
4620extern __inline __m128
4621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4622_mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4623{
4624 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
4625 (__v4sf) __B,
4626 (__v4sf) __C,
4627 (__mmask8) __U);
4628}
4629
4630extern __inline __m256d
4631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4632_mm256_mask_fnmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4633 __m256d __C)
4634{
4635 return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
4636 (__v4df) __B,
4637 (__v4df) __C,
4638 (__mmask8) __U);
4639}
4640
4641extern __inline __m256d
4642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4643_mm256_mask3_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4644 __mmask8 __U)
4645{
4646 return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
4647 (__v4df) __B,
4648 (__v4df) __C,
4649 (__mmask8) __U);
4650}
4651
4652extern __inline __m256d
4653__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4654_mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4655 __m256d __C)
4656{
4657 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
4658 (__v4df) __B,
4659 -(__v4df) __C,
4660 (__mmask8) __U);
4661}
4662
4663extern __inline __m128d
4664__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4665_mm_mask_fnmsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4666 __m128d __C)
4667{
4668 return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
4669 (__v2df) __B,
4670 (__v2df) __C,
4671 (__mmask8) __U);
4672}
4673
4674extern __inline __m128d
4675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4676_mm_mask3_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4677 __mmask8 __U)
4678{
4679 return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
4680 (__v2df) __B,
4681 (__v2df) __C,
4682 (__mmask8) __U);
4683}
4684
4685extern __inline __m128d
4686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4687_mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4688 __m128d __C)
4689{
4690 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
4691 (__v2df) __B,
4692 -(__v2df) __C,
4693 (__mmask8) __U);
4694}
4695
4696extern __inline __m256
4697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4698_mm256_mask_fnmsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4699 __m256 __C)
4700{
4701 return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
4702 (__v8sf) __B,
4703 (__v8sf) __C,
4704 (__mmask8) __U);
4705}
4706
4707extern __inline __m256
4708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4709_mm256_mask3_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4710 __mmask8 __U)
4711{
4712 return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
4713 (__v8sf) __B,
4714 (__v8sf) __C,
4715 (__mmask8) __U);
4716}
4717
4718extern __inline __m256
4719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4720_mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4721 __m256 __C)
4722{
4723 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
4724 (__v8sf) __B,
4725 -(__v8sf) __C,
4726 (__mmask8) __U);
4727}
4728
4729extern __inline __m128
4730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4731_mm_mask_fnmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4732{
4733 return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
4734 (__v4sf) __B,
4735 (__v4sf) __C,
4736 (__mmask8) __U);
4737}
4738
4739extern __inline __m128
4740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4741_mm_mask3_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4742{
4743 return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
4744 (__v4sf) __B,
4745 (__v4sf) __C,
4746 (__mmask8) __U);
4747}
4748
4749extern __inline __m128
4750__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4751_mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4752{
4753 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
4754 (__v4sf) __B,
4755 -(__v4sf) __C,
4756 (__mmask8) __U);
4757}
4758
4759extern __inline __m128i
4760__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4761_mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4762 __m128i __B)
4763{
4764 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4765 (__v4si) __B,
4766 (__v4si) __W,
4767 (__mmask8) __U);
4768}
4769
4770extern __inline __m128i
4771__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4772_mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4773{
4774 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4775 (__v4si) __B,
4776 (__v4si)
4777 _mm_setzero_si128 (),
4778 (__mmask8) __U);
4779}
4780
4781extern __inline __m256i
4782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4783_mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4784 __m256i __B)
4785{
4786 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4787 (__v8si) __B,
4788 (__v8si) __W,
4789 (__mmask8) __U);
4790}
4791
4792extern __inline __m256i
4793__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4794_mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4795{
4796 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4797 (__v8si) __B,
4798 (__v8si)
4799 _mm256_setzero_si256 (),
4800 (__mmask8) __U);
4801}
4802
4803extern __inline __m128i
4804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4805_mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4806 __m128i __B)
4807{
4808 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4809 (__v4si) __B,
4810 (__v4si) __W,
4811 (__mmask8) __U);
4812}
4813
4814extern __inline __m128i
4815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4816_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4817{
4818 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4819 (__v4si) __B,
4820 (__v4si)
4821 _mm_setzero_si128 (),
4822 (__mmask8) __U);
4823}
4824
4825extern __inline __m256i
4826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4827_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4828 __m256i __B)
4829{
4830 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4831 (__v8si) __B,
4832 (__v8si) __W,
4833 (__mmask8) __U);
4834}
4835
4836extern __inline __m256i
4837__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4838_mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4839{
4840 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4841 (__v8si) __B,
4842 (__v8si)
4843 _mm256_setzero_si256 (),
4844 (__mmask8) __U);
4845}
4846
4847extern __inline __m128i
4848__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4849_mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4850{
4851 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4852 (__v4si) __B,
4853 (__v4si) __W,
4854 (__mmask8) __U);
4855}
4856
4857extern __inline __m128i
4858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4859_mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4860{
4861 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4862 (__v4si) __B,
4863 (__v4si)
4864 _mm_setzero_si128 (),
4865 (__mmask8) __U);
4866}
4867
4868extern __inline __m256i
4869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4870_mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4871 __m256i __B)
4872{
4873 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4874 (__v8si) __B,
4875 (__v8si) __W,
4876 (__mmask8) __U);
4877}
4878
4879extern __inline __m256i
4880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4881_mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4882{
4883 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4884 (__v8si) __B,
4885 (__v8si)
4886 _mm256_setzero_si256 (),
4887 (__mmask8) __U);
4888}
4889
4890extern __inline __m128i
4891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4892_mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4893 __m128i __B)
4894{
4895 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4896 (__v4si) __B,
4897 (__v4si) __W,
4898 (__mmask8) __U);
4899}
4900
4901extern __inline __m128i
4902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4903_mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4904{
4905 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4906 (__v4si) __B,
4907 (__v4si)
4908 _mm_setzero_si128 (),
4909 (__mmask8) __U);
4910}
4911
4912extern __inline __m128
4913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4914_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A)
4915{
4916 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4917 (__v4sf) __W,
4918 (__mmask8) __U);
4919}
4920
4921extern __inline __m128
4922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4923_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A)
4924{
4925 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4926 (__v4sf)
4927 _mm_setzero_ps (),
4928 (__mmask8) __U);
4929}
4930
4931extern __inline __m128
4932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4933_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A)
4934{
4935 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4936 (__v4sf) __W,
4937 (__mmask8) __U);
4938}
4939
4940extern __inline __m128
4941__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4942_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A)
4943{
4944 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4945 (__v4sf)
4946 _mm_setzero_ps (),
4947 (__mmask8) __U);
4948}
4949
4950extern __inline __m256i
4951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4952_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
4953{
4954 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4955 (__v8si) __W,
4956 (__mmask8) __U);
4957}
4958
4959extern __inline __m256i
4960__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4961_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A)
4962{
4963 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4964 (__v8si)
4965 _mm256_setzero_si256 (),
4966 (__mmask8) __U);
4967}
4968
4969extern __inline __m128i
4970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4971_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
4972{
4973 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
4974 (__v4si) __W,
4975 (__mmask8) __U);
4976}
4977
4978extern __inline __m128i
4979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4980_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A)
4981{
4982 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
4983 (__v4si)
4984 _mm_setzero_si128 (),
4985 (__mmask8) __U);
4986}
4987
4988extern __inline __m256i
4989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4990_mm256_cvtps_epu32 (__m256 __A)
4991{
4992 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
4993 (__v8si)
4994 _mm256_setzero_si256 (),
4995 (__mmask8) -1);
4996}
4997
4998extern __inline __m256i
4999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5000_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
5001{
5002 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5003 (__v8si) __W,
5004 (__mmask8) __U);
5005}
5006
5007extern __inline __m256i
5008__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5009_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A)
5010{
5011 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5012 (__v8si)
5013 _mm256_setzero_si256 (),
5014 (__mmask8) __U);
5015}
5016
5017extern __inline __m128i
5018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5019_mm_cvtps_epu32 (__m128 __A)
5020{
5021 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5022 (__v4si)
5023 _mm_setzero_si128 (),
5024 (__mmask8) -1);
5025}
5026
5027extern __inline __m128i
5028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5029_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
5030{
5031 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5032 (__v4si) __W,
5033 (__mmask8) __U);
5034}
5035
5036extern __inline __m128i
5037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5038_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A)
5039{
5040 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5041 (__v4si)
5042 _mm_setzero_si128 (),
5043 (__mmask8) __U);
5044}
5045
5046extern __inline __m256d
5047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5048_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5049{
5050 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5051 (__v4df) __W,
5052 (__mmask8) __U);
5053}
5054
5055extern __inline __m256d
5056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5057_mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5058{
5059 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5060 (__v4df)
5061 _mm256_setzero_pd (),
5062 (__mmask8) __U);
5063}
5064
5065extern __inline __m128d
5066__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5067_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5068{
5069 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5070 (__v2df) __W,
5071 (__mmask8) __U);
5072}
5073
5074extern __inline __m128d
5075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5076_mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5077{
5078 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5079 (__v2df)
5080 _mm_setzero_pd (),
5081 (__mmask8) __U);
5082}
5083
5084extern __inline __m256
5085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5086_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5087{
5088 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5089 (__v8sf) __W,
5090 (__mmask8) __U);
5091}
5092
5093extern __inline __m256
5094__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5095_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
5096{
5097 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5098 (__v8sf)
5099 _mm256_setzero_ps (),
5100 (__mmask8) __U);
5101}
5102
5103extern __inline __m128
5104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5105_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5106{
5107 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5108 (__v4sf) __W,
5109 (__mmask8) __U);
5110}
5111
5112extern __inline __m128
5113__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5114_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
5115{
5116 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5117 (__v4sf)
5118 _mm_setzero_ps (),
5119 (__mmask8) __U);
5120}
5121
5122extern __inline __m256
5123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5124_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5125{
5126 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5127 (__v8sf) __W,
5128 (__mmask8) __U);
5129}
5130
5131extern __inline __m256
5132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5133_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
5134{
5135 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5136 (__v8sf)
5137 _mm256_setzero_ps (),
5138 (__mmask8) __U);
5139}
5140
5141extern __inline __m128
5142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5143_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5144{
5145 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5146 (__v4sf) __W,
5147 (__mmask8) __U);
5148}
5149
5150extern __inline __m128
5151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5152_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
5153{
5154 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5155 (__v4sf)
5156 _mm_setzero_ps (),
5157 (__mmask8) __U);
5158}
5159
5160extern __inline __m128i
5161__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5162_mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5163 __m128i __B)
5164{
5165 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5166 (__v4si) __B,
5167 (__v4si) __W,
5168 (__mmask8) __U);
5169}
5170
5171extern __inline __m128i
5172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5173_mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5174{
5175 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5176 (__v4si) __B,
5177 (__v4si)
5178 _mm_setzero_si128 (),
5179 (__mmask8) __U);
5180}
5181
5182extern __inline __m256i
5183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5184_mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5185 __m256i __B)
5186{
5187 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5188 (__v8si) __B,
5189 (__v8si) __W,
5190 (__mmask8) __U);
5191}
5192
5193extern __inline __m256i
5194__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5195_mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5196{
5197 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5198 (__v8si) __B,
5199 (__v8si)
5200 _mm256_setzero_si256 (),
5201 (__mmask8) __U);
5202}
5203
5204extern __inline __m128i
5205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5206_mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5207 __m128i __B)
5208{
5209 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5210 (__v2di) __B,
5211 (__v2di) __W,
5212 (__mmask8) __U);
5213}
5214
5215extern __inline __m128i
5216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5217_mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5218{
5219 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5220 (__v2di) __B,
5221 (__v2di)
5222 _mm_setzero_di (),
5223 (__mmask8) __U);
5224}
5225
5226extern __inline __m256i
5227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5228_mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5229 __m256i __B)
5230{
5231 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5232 (__v4di) __B,
5233 (__v4di) __W,
5234 (__mmask8) __U);
5235}
5236
5237extern __inline __m256i
5238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5239_mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5240{
5241 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5242 (__v4di) __B,
5243 (__v4di)
5244 _mm256_setzero_si256 (),
5245 (__mmask8) __U);
5246}
5247
5248extern __inline __m128i
5249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5250_mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5251 __m128i __B)
5252{
5253 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5254 (__v4si) __B,
5255 (__v4si) __W,
5256 (__mmask8) __U);
5257}
5258
5259extern __inline __m128i
5260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5261_mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5262{
5263 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5264 (__v4si) __B,
5265 (__v4si)
5266 _mm_setzero_si128 (),
5267 (__mmask8) __U);
5268}
5269
5270extern __inline __m256i
5271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5272_mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5273 __m256i __B)
5274{
5275 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5276 (__v8si) __B,
5277 (__v8si) __W,
5278 (__mmask8) __U);
5279}
5280
5281extern __inline __m256i
5282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5283_mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5284{
5285 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5286 (__v8si) __B,
5287 (__v8si)
5288 _mm256_setzero_si256 (),
5289 (__mmask8) __U);
5290}
5291
5292extern __inline __m128i
5293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5294_mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5295 __m128i __B)
5296{
5297 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5298 (__v2di) __B,
5299 (__v2di) __W,
5300 (__mmask8) __U);
5301}
5302
5303extern __inline __m128i
5304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5305_mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5306{
5307 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5308 (__v2di) __B,
5309 (__v2di)
5310 _mm_setzero_di (),
5311 (__mmask8) __U);
5312}
5313
5314extern __inline __m256i
5315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5316_mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5317 __m256i __B)
5318{
5319 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5320 (__v4di) __B,
5321 (__v4di) __W,
5322 (__mmask8) __U);
5323}
5324
5325extern __inline __m256i
5326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5327_mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5328{
5329 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5330 (__v4di) __B,
5331 (__v4di)
5332 _mm256_setzero_si256 (),
5333 (__mmask8) __U);
5334}
5335
eee5d6f5
AI
5336extern __inline __mmask8
5337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5338_mm_cmpeq_epu32_mask (__m128i __A, __m128i __B)
5339{
5340 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5341 (__v4si) __B, 0,
5342 (__mmask8) -1);
5343}
5344
936c0fe4
AI
5345extern __inline __mmask8
5346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5347_mm_cmpeq_epi32_mask (__m128i __A, __m128i __B)
5348{
5349 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5350 (__v4si) __B,
5351 (__mmask8) -1);
5352}
5353
eee5d6f5
AI
5354extern __inline __mmask8
5355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5356_mm_mask_cmpeq_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5357{
5358 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5359 (__v4si) __B, 0, __U);
5360}
5361
936c0fe4
AI
5362extern __inline __mmask8
5363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5364_mm_mask_cmpeq_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5365{
5366 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5367 (__v4si) __B, __U);
5368}
5369
eee5d6f5
AI
5370extern __inline __mmask8
5371__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5372_mm256_cmpeq_epu32_mask (__m256i __A, __m256i __B)
5373{
5374 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5375 (__v8si) __B, 0,
5376 (__mmask8) -1);
5377}
5378
936c0fe4
AI
5379extern __inline __mmask8
5380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5381_mm256_cmpeq_epi32_mask (__m256i __A, __m256i __B)
5382{
5383 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5384 (__v8si) __B,
5385 (__mmask8) -1);
5386}
5387
eee5d6f5
AI
5388extern __inline __mmask8
5389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5390_mm256_mask_cmpeq_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5391{
5392 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5393 (__v8si) __B, 0, __U);
5394}
5395
936c0fe4
AI
5396extern __inline __mmask8
5397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5398_mm256_mask_cmpeq_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5399{
5400 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5401 (__v8si) __B, __U);
5402}
5403
eee5d6f5
AI
5404extern __inline __mmask8
5405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5406_mm_cmpeq_epu64_mask (__m128i __A, __m128i __B)
5407{
5408 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5409 (__v2di) __B, 0,
5410 (__mmask8) -1);
5411}
5412
936c0fe4
AI
5413extern __inline __mmask8
5414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5415_mm_cmpeq_epi64_mask (__m128i __A, __m128i __B)
5416{
5417 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5418 (__v2di) __B,
5419 (__mmask8) -1);
5420}
5421
eee5d6f5
AI
5422extern __inline __mmask8
5423__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5424_mm_mask_cmpeq_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5425{
5426 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5427 (__v2di) __B, 0, __U);
5428}
5429
936c0fe4
AI
5430extern __inline __mmask8
5431__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5432_mm_mask_cmpeq_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5433{
5434 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5435 (__v2di) __B, __U);
5436}
5437
eee5d6f5
AI
5438extern __inline __mmask8
5439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5440_mm256_cmpeq_epu64_mask (__m256i __A, __m256i __B)
5441{
5442 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5443 (__v4di) __B, 0,
5444 (__mmask8) -1);
5445}
5446
936c0fe4
AI
5447extern __inline __mmask8
5448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5449_mm256_cmpeq_epi64_mask (__m256i __A, __m256i __B)
5450{
5451 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5452 (__v4di) __B,
5453 (__mmask8) -1);
5454}
5455
eee5d6f5
AI
5456extern __inline __mmask8
5457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5458_mm256_mask_cmpeq_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5459{
5460 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5461 (__v4di) __B, 0, __U);
5462}
5463
936c0fe4
AI
5464extern __inline __mmask8
5465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5466_mm256_mask_cmpeq_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5467{
5468 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5469 (__v4di) __B, __U);
5470}
5471
eee5d6f5
AI
5472extern __inline __mmask8
5473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5474_mm_cmpgt_epu32_mask (__m128i __A, __m128i __B)
5475{
5476 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5477 (__v4si) __B, 6,
5478 (__mmask8) -1);
5479}
5480
936c0fe4
AI
5481extern __inline __mmask8
5482__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5483_mm_cmpgt_epi32_mask (__m128i __A, __m128i __B)
5484{
5485 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5486 (__v4si) __B,
5487 (__mmask8) -1);
5488}
5489
eee5d6f5
AI
5490extern __inline __mmask8
5491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5492_mm_mask_cmpgt_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5493{
5494 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5495 (__v4si) __B, 6, __U);
5496}
5497
936c0fe4
AI
5498extern __inline __mmask8
5499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5500_mm_mask_cmpgt_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5501{
5502 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5503 (__v4si) __B, __U);
5504}
5505
eee5d6f5
AI
5506extern __inline __mmask8
5507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5508_mm256_cmpgt_epu32_mask (__m256i __A, __m256i __B)
5509{
5510 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5511 (__v8si) __B, 6,
5512 (__mmask8) -1);
5513}
5514
936c0fe4
AI
5515extern __inline __mmask8
5516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5517_mm256_cmpgt_epi32_mask (__m256i __A, __m256i __B)
5518{
5519 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5520 (__v8si) __B,
5521 (__mmask8) -1);
5522}
5523
eee5d6f5
AI
5524extern __inline __mmask8
5525__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5526_mm256_mask_cmpgt_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5527{
5528 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5529 (__v8si) __B, 6, __U);
5530}
5531
936c0fe4
AI
5532extern __inline __mmask8
5533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5534_mm256_mask_cmpgt_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5535{
5536 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5537 (__v8si) __B, __U);
5538}
5539
eee5d6f5
AI
5540extern __inline __mmask8
5541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5542_mm_cmpgt_epu64_mask (__m128i __A, __m128i __B)
5543{
5544 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5545 (__v2di) __B, 6,
5546 (__mmask8) -1);
5547}
5548
936c0fe4
AI
5549extern __inline __mmask8
5550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5551_mm_cmpgt_epi64_mask (__m128i __A, __m128i __B)
5552{
5553 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5554 (__v2di) __B,
5555 (__mmask8) -1);
5556}
5557
eee5d6f5
AI
5558extern __inline __mmask8
5559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5560_mm_mask_cmpgt_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5561{
5562 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5563 (__v2di) __B, 6, __U);
5564}
5565
936c0fe4
AI
5566extern __inline __mmask8
5567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5568_mm_mask_cmpgt_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5569{
5570 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5571 (__v2di) __B, __U);
5572}
5573
eee5d6f5
AI
5574extern __inline __mmask8
5575__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5576_mm256_cmpgt_epu64_mask (__m256i __A, __m256i __B)
5577{
5578 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5579 (__v4di) __B, 6,
5580 (__mmask8) -1);
5581}
5582
936c0fe4
AI
5583extern __inline __mmask8
5584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5585_mm256_cmpgt_epi64_mask (__m256i __A, __m256i __B)
5586{
5587 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5588 (__v4di) __B,
5589 (__mmask8) -1);
5590}
5591
eee5d6f5
AI
5592extern __inline __mmask8
5593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5594_mm256_mask_cmpgt_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5595{
5596 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5597 (__v4di) __B, 6, __U);
5598}
5599
936c0fe4
AI
5600extern __inline __mmask8
5601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5602_mm256_mask_cmpgt_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5603{
5604 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5605 (__v4di) __B, __U);
5606}
5607
5608extern __inline __mmask8
5609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5610_mm_test_epi32_mask (__m128i __A, __m128i __B)
5611{
5612 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5613 (__v4si) __B,
5614 (__mmask8) -1);
5615}
5616
5617extern __inline __mmask8
5618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5619_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5620{
5621 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5622 (__v4si) __B, __U);
5623}
5624
5625extern __inline __mmask8
5626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5627_mm256_test_epi32_mask (__m256i __A, __m256i __B)
5628{
5629 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5630 (__v8si) __B,
5631 (__mmask8) -1);
5632}
5633
5634extern __inline __mmask8
5635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5636_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5637{
5638 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5639 (__v8si) __B, __U);
5640}
5641
5642extern __inline __mmask8
5643__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5644_mm_test_epi64_mask (__m128i __A, __m128i __B)
5645{
5646 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5647 (__v2di) __B,
5648 (__mmask8) -1);
5649}
5650
5651extern __inline __mmask8
5652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5653_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5654{
5655 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5656 (__v2di) __B, __U);
5657}
5658
5659extern __inline __mmask8
5660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5661_mm256_test_epi64_mask (__m256i __A, __m256i __B)
5662{
5663 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5664 (__v4di) __B,
5665 (__mmask8) -1);
5666}
5667
5668extern __inline __mmask8
5669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5670_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5671{
5672 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5673 (__v4di) __B, __U);
5674}
5675
5676extern __inline __mmask8
5677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5678_mm_testn_epi32_mask (__m128i __A, __m128i __B)
5679{
5680 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5681 (__v4si) __B,
5682 (__mmask8) -1);
5683}
5684
5685extern __inline __mmask8
5686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5687_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5688{
5689 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5690 (__v4si) __B, __U);
5691}
5692
5693extern __inline __mmask8
5694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5695_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
5696{
5697 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5698 (__v8si) __B,
5699 (__mmask8) -1);
5700}
5701
5702extern __inline __mmask8
5703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5704_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5705{
5706 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5707 (__v8si) __B, __U);
5708}
5709
5710extern __inline __mmask8
5711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5712_mm_testn_epi64_mask (__m128i __A, __m128i __B)
5713{
5714 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5715 (__v2di) __B,
5716 (__mmask8) -1);
5717}
5718
5719extern __inline __mmask8
5720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5721_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5722{
5723 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5724 (__v2di) __B, __U);
5725}
5726
5727extern __inline __mmask8
5728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5729_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
5730{
5731 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5732 (__v4di) __B,
5733 (__mmask8) -1);
5734}
5735
5736extern __inline __mmask8
5737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5738_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5739{
5740 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5741 (__v4di) __B, __U);
5742}
5743
5744extern __inline __m256d
5745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5746_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A)
5747{
5748 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5749 (__v4df) __W,
5750 (__mmask8) __U);
5751}
5752
5753extern __inline __m256d
5754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5755_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A)
5756{
5757 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5758 (__v4df)
5759 _mm256_setzero_pd (),
5760 (__mmask8) __U);
5761}
5762
5763extern __inline void
5764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5765_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A)
5766{
5767 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
5768 (__v4df) __A,
5769 (__mmask8) __U);
5770}
5771
5772extern __inline __m128d
5773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5774_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A)
5775{
5776 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5777 (__v2df) __W,
5778 (__mmask8) __U);
5779}
5780
5781extern __inline __m128d
5782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5783_mm_maskz_compress_pd (__mmask8 __U, __m128d __A)
5784{
5785 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5786 (__v2df)
5787 _mm_setzero_pd (),
5788 (__mmask8) __U);
5789}
5790
5791extern __inline void
5792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5793_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A)
5794{
5795 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
5796 (__v2df) __A,
5797 (__mmask8) __U);
5798}
5799
5800extern __inline __m256
5801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5802_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A)
5803{
5804 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5805 (__v8sf) __W,
5806 (__mmask8) __U);
5807}
5808
5809extern __inline __m256
5810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5811_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A)
5812{
5813 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5814 (__v8sf)
5815 _mm256_setzero_ps (),
5816 (__mmask8) __U);
5817}
5818
5819extern __inline void
5820__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5821_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A)
5822{
5823 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
5824 (__v8sf) __A,
5825 (__mmask8) __U);
5826}
5827
5828extern __inline __m128
5829__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5830_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A)
5831{
5832 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5833 (__v4sf) __W,
5834 (__mmask8) __U);
5835}
5836
5837extern __inline __m128
5838__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5839_mm_maskz_compress_ps (__mmask8 __U, __m128 __A)
5840{
5841 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5842 (__v4sf)
5843 _mm_setzero_ps (),
5844 (__mmask8) __U);
5845}
5846
5847extern __inline void
5848__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5849_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A)
5850{
5851 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
5852 (__v4sf) __A,
5853 (__mmask8) __U);
5854}
5855
5856extern __inline __m256i
5857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5858_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5859{
5860 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5861 (__v4di) __W,
5862 (__mmask8) __U);
5863}
5864
5865extern __inline __m256i
5866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5867_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A)
5868{
5869 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5870 (__v4di)
5871 _mm256_setzero_si256 (),
5872 (__mmask8) __U);
5873}
5874
5875extern __inline void
5876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5877_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5878{
5879 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
5880 (__v4di) __A,
5881 (__mmask8) __U);
5882}
5883
5884extern __inline __m128i
5885__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5886_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5887{
5888 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5889 (__v2di) __W,
5890 (__mmask8) __U);
5891}
5892
5893extern __inline __m128i
5894__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5895_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
5896{
5897 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5898 (__v2di)
5899 _mm_setzero_di (),
5900 (__mmask8) __U);
5901}
5902
5903extern __inline void
5904__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5905_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5906{
5907 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
5908 (__v2di) __A,
5909 (__mmask8) __U);
5910}
5911
5912extern __inline __m256i
5913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5914_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5915{
5916 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5917 (__v8si) __W,
5918 (__mmask8) __U);
5919}
5920
5921extern __inline __m256i
5922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5923_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A)
5924{
5925 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5926 (__v8si)
5927 _mm256_setzero_si256 (),
5928 (__mmask8) __U);
5929}
5930
5931extern __inline void
5932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5933_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5934{
5935 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
5936 (__v8si) __A,
5937 (__mmask8) __U);
5938}
5939
5940extern __inline __m128i
5941__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5942_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5943{
5944 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5945 (__v4si) __W,
5946 (__mmask8) __U);
5947}
5948
5949extern __inline __m128i
5950__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5951_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A)
5952{
5953 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5954 (__v4si)
5955 _mm_setzero_si128 (),
5956 (__mmask8) __U);
5957}
5958
5959extern __inline void
5960__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5961_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5962{
5963 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
5964 (__v4si) __A,
5965 (__mmask8) __U);
5966}
5967
5968extern __inline __m256d
5969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5970_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A)
5971{
5972 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
5973 (__v4df) __W,
5974 (__mmask8) __U);
5975}
5976
5977extern __inline __m256d
5978__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5979_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A)
5980{
5981 return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A,
5982 (__v4df)
5983 _mm256_setzero_pd (),
5984 (__mmask8) __U);
5985}
5986
5987extern __inline __m256d
5988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5989_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P)
5990{
5991 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
5992 (__v4df) __W,
5993 (__mmask8)
5994 __U);
5995}
5996
5997extern __inline __m256d
5998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5999_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6000{
6001 return (__m256d) __builtin_ia32_expandloaddf256_maskz ((__v4df *) __P,
6002 (__v4df)
6003 _mm256_setzero_pd (),
6004 (__mmask8)
6005 __U);
6006}
6007
6008extern __inline __m128d
6009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6010_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A)
6011{
6012 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
6013 (__v2df) __W,
6014 (__mmask8) __U);
6015}
6016
6017extern __inline __m128d
6018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6019_mm_maskz_expand_pd (__mmask8 __U, __m128d __A)
6020{
6021 return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A,
6022 (__v2df)
6023 _mm_setzero_pd (),
6024 (__mmask8) __U);
6025}
6026
6027extern __inline __m128d
6028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6029_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P)
6030{
6031 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
6032 (__v2df) __W,
6033 (__mmask8)
6034 __U);
6035}
6036
6037extern __inline __m128d
6038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6039_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6040{
6041 return (__m128d) __builtin_ia32_expandloaddf128_maskz ((__v2df *) __P,
6042 (__v2df)
6043 _mm_setzero_pd (),
6044 (__mmask8)
6045 __U);
6046}
6047
6048extern __inline __m256
6049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6050_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A)
6051{
6052 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
6053 (__v8sf) __W,
6054 (__mmask8) __U);
6055}
6056
6057extern __inline __m256
6058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6059_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A)
6060{
6061 return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A,
6062 (__v8sf)
6063 _mm256_setzero_ps (),
6064 (__mmask8) __U);
6065}
6066
6067extern __inline __m256
6068__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6069_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P)
6070{
6071 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
6072 (__v8sf) __W,
6073 (__mmask8) __U);
6074}
6075
6076extern __inline __m256
6077__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6078_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6079{
6080 return (__m256) __builtin_ia32_expandloadsf256_maskz ((__v8sf *) __P,
6081 (__v8sf)
6082 _mm256_setzero_ps (),
6083 (__mmask8)
6084 __U);
6085}
6086
6087extern __inline __m128
6088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6089_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A)
6090{
6091 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
6092 (__v4sf) __W,
6093 (__mmask8) __U);
6094}
6095
6096extern __inline __m128
6097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6098_mm_maskz_expand_ps (__mmask8 __U, __m128 __A)
6099{
6100 return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A,
6101 (__v4sf)
6102 _mm_setzero_ps (),
6103 (__mmask8) __U);
6104}
6105
6106extern __inline __m128
6107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6108_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P)
6109{
6110 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
6111 (__v4sf) __W,
6112 (__mmask8) __U);
6113}
6114
6115extern __inline __m128
6116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6117_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6118{
6119 return (__m128) __builtin_ia32_expandloadsf128_maskz ((__v4sf *) __P,
6120 (__v4sf)
6121 _mm_setzero_ps (),
6122 (__mmask8)
6123 __U);
6124}
6125
6126extern __inline __m256i
6127__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6128_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
6129{
6130 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
6131 (__v4di) __W,
6132 (__mmask8) __U);
6133}
6134
6135extern __inline __m256i
6136__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6137_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A)
6138{
6139 return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A,
6140 (__v4di)
6141 _mm256_setzero_si256 (),
6142 (__mmask8) __U);
6143}
6144
6145extern __inline __m256i
6146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6147_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
6148 void const *__P)
6149{
6150 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
6151 (__v4di) __W,
6152 (__mmask8)
6153 __U);
6154}
6155
6156extern __inline __m256i
6157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6158_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6159{
6160 return (__m256i) __builtin_ia32_expandloaddi256_maskz ((__v4di *) __P,
6161 (__v4di)
6162 _mm256_setzero_si256 (),
6163 (__mmask8)
6164 __U);
6165}
6166
6167extern __inline __m128i
6168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6169_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
6170{
6171 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
6172 (__v2di) __W,
6173 (__mmask8) __U);
6174}
6175
6176extern __inline __m128i
6177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6178_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A)
6179{
6180 return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A,
6181 (__v2di)
6182 _mm_setzero_si128 (),
6183 (__mmask8) __U);
6184}
6185
6186extern __inline __m128i
6187__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6188_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
6189{
6190 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
6191 (__v2di) __W,
6192 (__mmask8)
6193 __U);
6194}
6195
6196extern __inline __m128i
6197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6198_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6199{
6200 return (__m128i) __builtin_ia32_expandloaddi128_maskz ((__v2di *) __P,
6201 (__v2di)
6202 _mm_setzero_si128 (),
6203 (__mmask8)
6204 __U);
6205}
6206
6207extern __inline __m256i
6208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6209_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
6210{
6211 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
6212 (__v8si) __W,
6213 (__mmask8) __U);
6214}
6215
6216extern __inline __m256i
6217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6218_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A)
6219{
6220 return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A,
6221 (__v8si)
6222 _mm256_setzero_si256 (),
6223 (__mmask8) __U);
6224}
6225
6226extern __inline __m256i
6227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6228_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
6229 void const *__P)
6230{
6231 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
6232 (__v8si) __W,
6233 (__mmask8)
6234 __U);
6235}
6236
6237extern __inline __m256i
6238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6239_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6240{
6241 return (__m256i) __builtin_ia32_expandloadsi256_maskz ((__v8si *) __P,
6242 (__v8si)
6243 _mm256_setzero_si256 (),
6244 (__mmask8)
6245 __U);
6246}
6247
6248extern __inline __m128i
6249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6250_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
6251{
6252 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
6253 (__v4si) __W,
6254 (__mmask8) __U);
6255}
6256
6257extern __inline __m128i
6258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6259_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A)
6260{
6261 return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A,
6262 (__v4si)
6263 _mm_setzero_si128 (),
6264 (__mmask8) __U);
6265}
6266
6267extern __inline __m128i
6268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6269_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
6270{
6271 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
6272 (__v4si) __W,
6273 (__mmask8)
6274 __U);
6275}
6276
6277extern __inline __m128i
6278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6279_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6280{
6281 return (__m128i) __builtin_ia32_expandloadsi128_maskz ((__v4si *) __P,
6282 (__v4si)
6283 _mm_setzero_si128 (),
6284 (__mmask8)
6285 __U);
6286}
6287
6288extern __inline __m256d
6289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6290_mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B)
6291{
6292 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6293 /* idx */ ,
6294 (__v4df) __A,
6295 (__v4df) __B,
6296 (__mmask8) -
6297 1);
6298}
6299
6300extern __inline __m256d
6301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6302_mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
6303 __m256d __B)
6304{
6305 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6306 /* idx */ ,
6307 (__v4df) __A,
6308 (__v4df) __B,
6309 (__mmask8)
6310 __U);
6311}
6312
6313extern __inline __m256d
6314__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6315_mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
6316 __m256d __B)
6317{
6318 return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
6319 (__v4di) __I
6320 /* idx */ ,
6321 (__v4df) __B,
6322 (__mmask8)
6323 __U);
6324}
6325
6326extern __inline __m256d
6327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6328_mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
6329 __m256d __B)
6330{
6331 return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
6332 /* idx */ ,
6333 (__v4df) __A,
6334 (__v4df) __B,
6335 (__mmask8)
6336 __U);
6337}
6338
6339extern __inline __m256
6340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6341_mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B)
6342{
6343 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6344 /* idx */ ,
6345 (__v8sf) __A,
6346 (__v8sf) __B,
6347 (__mmask8) -1);
6348}
6349
6350extern __inline __m256
6351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6352_mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
6353 __m256 __B)
6354{
6355 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6356 /* idx */ ,
6357 (__v8sf) __A,
6358 (__v8sf) __B,
6359 (__mmask8) __U);
6360}
6361
6362extern __inline __m256
6363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6364_mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
6365 __m256 __B)
6366{
6367 return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
6368 (__v8si) __I
6369 /* idx */ ,
6370 (__v8sf) __B,
6371 (__mmask8) __U);
6372}
6373
6374extern __inline __m256
6375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6376_mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
6377 __m256 __B)
6378{
6379 return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
6380 /* idx */ ,
6381 (__v8sf) __A,
6382 (__v8sf) __B,
6383 (__mmask8)
6384 __U);
6385}
6386
6387extern __inline __m128i
6388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6389_mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B)
6390{
6391 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6392 /* idx */ ,
6393 (__v2di) __A,
6394 (__v2di) __B,
6395 (__mmask8) -1);
6396}
6397
6398extern __inline __m128i
6399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6400_mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
6401 __m128i __B)
6402{
6403 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6404 /* idx */ ,
6405 (__v2di) __A,
6406 (__v2di) __B,
6407 (__mmask8) __U);
6408}
6409
6410extern __inline __m128i
6411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6412_mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
6413 __m128i __B)
6414{
6415 return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
6416 (__v2di) __I
6417 /* idx */ ,
6418 (__v2di) __B,
6419 (__mmask8) __U);
6420}
6421
6422extern __inline __m128i
6423__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6424_mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
6425 __m128i __B)
6426{
6427 return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
6428 /* idx */ ,
6429 (__v2di) __A,
6430 (__v2di) __B,
6431 (__mmask8)
6432 __U);
6433}
6434
6435extern __inline __m128i
6436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6437_mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B)
6438{
6439 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6440 /* idx */ ,
6441 (__v4si) __A,
6442 (__v4si) __B,
6443 (__mmask8) -1);
6444}
6445
6446extern __inline __m128i
6447__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6448_mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
6449 __m128i __B)
6450{
6451 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6452 /* idx */ ,
6453 (__v4si) __A,
6454 (__v4si) __B,
6455 (__mmask8) __U);
6456}
6457
6458extern __inline __m128i
6459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6460_mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
6461 __m128i __B)
6462{
6463 return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
6464 (__v4si) __I
6465 /* idx */ ,
6466 (__v4si) __B,
6467 (__mmask8) __U);
6468}
6469
6470extern __inline __m128i
6471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6472_mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
6473 __m128i __B)
6474{
6475 return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
6476 /* idx */ ,
6477 (__v4si) __A,
6478 (__v4si) __B,
6479 (__mmask8)
6480 __U);
6481}
6482
6483extern __inline __m256i
6484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6485_mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B)
6486{
6487 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6488 /* idx */ ,
6489 (__v4di) __A,
6490 (__v4di) __B,
6491 (__mmask8) -1);
6492}
6493
6494extern __inline __m256i
6495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6496_mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
6497 __m256i __B)
6498{
6499 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6500 /* idx */ ,
6501 (__v4di) __A,
6502 (__v4di) __B,
6503 (__mmask8) __U);
6504}
6505
6506extern __inline __m256i
6507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6508_mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
6509 __mmask8 __U, __m256i __B)
6510{
6511 return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
6512 (__v4di) __I
6513 /* idx */ ,
6514 (__v4di) __B,
6515 (__mmask8) __U);
6516}
6517
6518extern __inline __m256i
6519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6520_mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
6521 __m256i __I, __m256i __B)
6522{
6523 return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
6524 /* idx */ ,
6525 (__v4di) __A,
6526 (__v4di) __B,
6527 (__mmask8)
6528 __U);
6529}
6530
6531extern __inline __m256i
6532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6533_mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B)
6534{
6535 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6536 /* idx */ ,
6537 (__v8si) __A,
6538 (__v8si) __B,
6539 (__mmask8) -1);
6540}
6541
6542extern __inline __m256i
6543__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6544_mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
6545 __m256i __B)
6546{
6547 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6548 /* idx */ ,
6549 (__v8si) __A,
6550 (__v8si) __B,
6551 (__mmask8) __U);
6552}
6553
6554extern __inline __m256i
6555__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6556_mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
6557 __mmask8 __U, __m256i __B)
6558{
6559 return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
6560 (__v8si) __I
6561 /* idx */ ,
6562 (__v8si) __B,
6563 (__mmask8) __U);
6564}
6565
6566extern __inline __m256i
6567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6568_mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
6569 __m256i __I, __m256i __B)
6570{
6571 return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
6572 /* idx */ ,
6573 (__v8si) __A,
6574 (__v8si) __B,
6575 (__mmask8)
6576 __U);
6577}
6578
6579extern __inline __m128d
6580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6581_mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B)
6582{
6583 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6584 /* idx */ ,
6585 (__v2df) __A,
6586 (__v2df) __B,
6587 (__mmask8) -
6588 1);
6589}
6590
6591extern __inline __m128d
6592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6593_mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
6594 __m128d __B)
6595{
6596 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6597 /* idx */ ,
6598 (__v2df) __A,
6599 (__v2df) __B,
6600 (__mmask8)
6601 __U);
6602}
6603
6604extern __inline __m128d
6605__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6606_mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
6607 __m128d __B)
6608{
6609 return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
6610 (__v2di) __I
6611 /* idx */ ,
6612 (__v2df) __B,
6613 (__mmask8)
6614 __U);
6615}
6616
6617extern __inline __m128d
6618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6619_mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
6620 __m128d __B)
6621{
6622 return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
6623 /* idx */ ,
6624 (__v2df) __A,
6625 (__v2df) __B,
6626 (__mmask8)
6627 __U);
6628}
6629
6630extern __inline __m128
6631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6632_mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B)
6633{
6634 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6635 /* idx */ ,
6636 (__v4sf) __A,
6637 (__v4sf) __B,
6638 (__mmask8) -1);
6639}
6640
6641extern __inline __m128
6642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6643_mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
6644 __m128 __B)
6645{
6646 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6647 /* idx */ ,
6648 (__v4sf) __A,
6649 (__v4sf) __B,
6650 (__mmask8) __U);
6651}
6652
6653extern __inline __m128
6654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6655_mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
6656 __m128 __B)
6657{
6658 return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
6659 (__v4si) __I
6660 /* idx */ ,
6661 (__v4sf) __B,
6662 (__mmask8) __U);
6663}
6664
6665extern __inline __m128
6666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6667_mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
6668 __m128 __B)
6669{
6670 return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
6671 /* idx */ ,
6672 (__v4sf) __A,
6673 (__v4sf) __B,
6674 (__mmask8)
6675 __U);
6676}
6677
6678extern __inline __m128i
6679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6680_mm_srav_epi64 (__m128i __X, __m128i __Y)
6681{
6682 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6683 (__v2di) __Y,
6684 (__v2di)
6685 _mm_setzero_di (),
6686 (__mmask8) -1);
6687}
6688
6689extern __inline __m128i
6690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6691_mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6692 __m128i __Y)
6693{
6694 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6695 (__v2di) __Y,
6696 (__v2di) __W,
6697 (__mmask8) __U);
6698}
6699
6700extern __inline __m128i
6701__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6702_mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6703{
6704 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6705 (__v2di) __Y,
6706 (__v2di)
6707 _mm_setzero_di (),
6708 (__mmask8) __U);
6709}
6710
6711extern __inline __m256i
6712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6713_mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6714 __m256i __Y)
6715{
6716 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6717 (__v8si) __Y,
6718 (__v8si) __W,
6719 (__mmask8) __U);
6720}
6721
6722extern __inline __m256i
6723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6724_mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6725{
6726 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6727 (__v8si) __Y,
6728 (__v8si)
6729 _mm256_setzero_si256 (),
6730 (__mmask8) __U);
6731}
6732
6733extern __inline __m128i
6734__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6735_mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6736 __m128i __Y)
6737{
6738 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6739 (__v4si) __Y,
6740 (__v4si) __W,
6741 (__mmask8) __U);
6742}
6743
6744extern __inline __m128i
6745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6746_mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6747{
6748 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6749 (__v4si) __Y,
6750 (__v4si)
6751 _mm_setzero_si128 (),
6752 (__mmask8) __U);
6753}
6754
6755extern __inline __m256i
6756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6757_mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6758 __m256i __Y)
6759{
6760 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6761 (__v4di) __Y,
6762 (__v4di) __W,
6763 (__mmask8) __U);
6764}
6765
6766extern __inline __m256i
6767__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6768_mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6769{
6770 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6771 (__v4di) __Y,
6772 (__v4di)
6773 _mm256_setzero_si256 (),
6774 (__mmask8) __U);
6775}
6776
6777extern __inline __m128i
6778__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6779_mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6780 __m128i __Y)
6781{
6782 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6783 (__v2di) __Y,
6784 (__v2di) __W,
6785 (__mmask8) __U);
6786}
6787
6788extern __inline __m128i
6789__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6790_mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6791{
6792 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6793 (__v2di) __Y,
6794 (__v2di)
6795 _mm_setzero_di (),
6796 (__mmask8) __U);
6797}
6798
6799extern __inline __m256i
6800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6801_mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6802 __m256i __Y)
6803{
6804 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6805 (__v8si) __Y,
6806 (__v8si) __W,
6807 (__mmask8) __U);
6808}
6809
6810extern __inline __m256i
6811__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6812_mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6813{
6814 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6815 (__v8si) __Y,
6816 (__v8si)
6817 _mm256_setzero_si256 (),
6818 (__mmask8) __U);
6819}
6820
6821extern __inline __m128i
6822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6823_mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6824 __m128i __Y)
6825{
6826 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6827 (__v4si) __Y,
6828 (__v4si) __W,
6829 (__mmask8) __U);
6830}
6831
6832extern __inline __m128i
6833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6834_mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6835{
6836 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6837 (__v4si) __Y,
6838 (__v4si)
6839 _mm_setzero_si128 (),
6840 (__mmask8) __U);
6841}
6842
6843extern __inline __m256i
6844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6845_mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6846 __m256i __Y)
6847{
6848 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6849 (__v8si) __Y,
6850 (__v8si) __W,
6851 (__mmask8) __U);
6852}
6853
6854extern __inline __m256i
6855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6856_mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6857{
6858 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6859 (__v8si) __Y,
6860 (__v8si)
6861 _mm256_setzero_si256 (),
6862 (__mmask8) __U);
6863}
6864
6865extern __inline __m128i
6866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6867_mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6868 __m128i __Y)
6869{
6870 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6871 (__v4si) __Y,
6872 (__v4si) __W,
6873 (__mmask8) __U);
6874}
6875
6876extern __inline __m128i
6877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6878_mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6879{
6880 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6881 (__v4si) __Y,
6882 (__v4si)
6883 _mm_setzero_si128 (),
6884 (__mmask8) __U);
6885}
6886
6887extern __inline __m256i
6888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6889_mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6890 __m256i __Y)
6891{
6892 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6893 (__v4di) __Y,
6894 (__v4di) __W,
6895 (__mmask8) __U);
6896}
6897
6898extern __inline __m256i
6899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6900_mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6901{
6902 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6903 (__v4di) __Y,
6904 (__v4di)
6905 _mm256_setzero_si256 (),
6906 (__mmask8) __U);
6907}
6908
6909extern __inline __m128i
6910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6911_mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6912 __m128i __Y)
6913{
6914 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6915 (__v2di) __Y,
6916 (__v2di) __W,
6917 (__mmask8) __U);
6918}
6919
6920extern __inline __m128i
6921__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6922_mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6923{
6924 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6925 (__v2di) __Y,
6926 (__v2di)
6927 _mm_setzero_di (),
6928 (__mmask8) __U);
6929}
6930
6931extern __inline __m256i
6932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6933_mm256_rolv_epi32 (__m256i __A, __m256i __B)
6934{
6935 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6936 (__v8si) __B,
6937 (__v8si)
6938 _mm256_setzero_si256 (),
6939 (__mmask8) -1);
6940}
6941
6942extern __inline __m256i
6943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6944_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
6945 __m256i __B)
6946{
6947 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6948 (__v8si) __B,
6949 (__v8si) __W,
6950 (__mmask8) __U);
6951}
6952
6953extern __inline __m256i
6954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6955_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
6956{
6957 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6958 (__v8si) __B,
6959 (__v8si)
6960 _mm256_setzero_si256 (),
6961 (__mmask8) __U);
6962}
6963
6964extern __inline __m128i
6965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6966_mm_rolv_epi32 (__m128i __A, __m128i __B)
6967{
6968 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
6969 (__v4si) __B,
6970 (__v4si)
6971 _mm_setzero_si128 (),
6972 (__mmask8) -1);
6973}
6974
6975extern __inline __m128i
6976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6977_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
6978 __m128i __B)
6979{
6980 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
6981 (__v4si) __B,
6982 (__v4si) __W,
6983 (__mmask8) __U);
6984}
6985
6986extern __inline __m128i
6987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6988_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
6989{
6990 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
6991 (__v4si) __B,
6992 (__v4si)
6993 _mm_setzero_si128 (),
6994 (__mmask8) __U);
6995}
6996
6997extern __inline __m256i
6998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6999_mm256_rorv_epi32 (__m256i __A, __m256i __B)
7000{
7001 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7002 (__v8si) __B,
7003 (__v8si)
7004 _mm256_setzero_si256 (),
7005 (__mmask8) -1);
7006}
7007
7008extern __inline __m256i
7009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7010_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
7011 __m256i __B)
7012{
7013 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7014 (__v8si) __B,
7015 (__v8si) __W,
7016 (__mmask8) __U);
7017}
7018
7019extern __inline __m256i
7020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7021_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
7022{
7023 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7024 (__v8si) __B,
7025 (__v8si)
7026 _mm256_setzero_si256 (),
7027 (__mmask8) __U);
7028}
7029
7030extern __inline __m128i
7031__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7032_mm_rorv_epi32 (__m128i __A, __m128i __B)
7033{
7034 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7035 (__v4si) __B,
7036 (__v4si)
7037 _mm_setzero_si128 (),
7038 (__mmask8) -1);
7039}
7040
7041extern __inline __m128i
7042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7043_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
7044 __m128i __B)
7045{
7046 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7047 (__v4si) __B,
7048 (__v4si) __W,
7049 (__mmask8) __U);
7050}
7051
7052extern __inline __m128i
7053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7054_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
7055{
7056 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7057 (__v4si) __B,
7058 (__v4si)
7059 _mm_setzero_si128 (),
7060 (__mmask8) __U);
7061}
7062
7063extern __inline __m256i
7064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7065_mm256_rolv_epi64 (__m256i __A, __m256i __B)
7066{
7067 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7068 (__v4di) __B,
7069 (__v4di)
7070 _mm256_setzero_si256 (),
7071 (__mmask8) -1);
7072}
7073
7074extern __inline __m256i
7075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7076_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7077 __m256i __B)
7078{
7079 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7080 (__v4di) __B,
7081 (__v4di) __W,
7082 (__mmask8) __U);
7083}
7084
7085extern __inline __m256i
7086__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7087_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7088{
7089 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7090 (__v4di) __B,
7091 (__v4di)
7092 _mm256_setzero_si256 (),
7093 (__mmask8) __U);
7094}
7095
7096extern __inline __m128i
7097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7098_mm_rolv_epi64 (__m128i __A, __m128i __B)
7099{
7100 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7101 (__v2di) __B,
7102 (__v2di)
7103 _mm_setzero_di (),
7104 (__mmask8) -1);
7105}
7106
7107extern __inline __m128i
7108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7109_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7110 __m128i __B)
7111{
7112 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7113 (__v2di) __B,
7114 (__v2di) __W,
7115 (__mmask8) __U);
7116}
7117
7118extern __inline __m128i
7119__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7120_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7121{
7122 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7123 (__v2di) __B,
7124 (__v2di)
7125 _mm_setzero_di (),
7126 (__mmask8) __U);
7127}
7128
7129extern __inline __m256i
7130__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7131_mm256_rorv_epi64 (__m256i __A, __m256i __B)
7132{
7133 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7134 (__v4di) __B,
7135 (__v4di)
7136 _mm256_setzero_si256 (),
7137 (__mmask8) -1);
7138}
7139
7140extern __inline __m256i
7141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7142_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7143 __m256i __B)
7144{
7145 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7146 (__v4di) __B,
7147 (__v4di) __W,
7148 (__mmask8) __U);
7149}
7150
7151extern __inline __m256i
7152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7153_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7154{
7155 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7156 (__v4di) __B,
7157 (__v4di)
7158 _mm256_setzero_si256 (),
7159 (__mmask8) __U);
7160}
7161
7162extern __inline __m128i
7163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7164_mm_rorv_epi64 (__m128i __A, __m128i __B)
7165{
7166 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7167 (__v2di) __B,
7168 (__v2di)
7169 _mm_setzero_di (),
7170 (__mmask8) -1);
7171}
7172
7173extern __inline __m128i
7174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7175_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7176 __m128i __B)
7177{
7178 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7179 (__v2di) __B,
7180 (__v2di) __W,
7181 (__mmask8) __U);
7182}
7183
7184extern __inline __m128i
7185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7186_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7187{
7188 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7189 (__v2di) __B,
7190 (__v2di)
7191 _mm_setzero_di (),
7192 (__mmask8) __U);
7193}
7194
7195extern __inline __m256i
7196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7197_mm256_srav_epi64 (__m256i __X, __m256i __Y)
7198{
7199 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7200 (__v4di) __Y,
7201 (__v4di)
7202 _mm256_setzero_si256 (),
7203 (__mmask8) -1);
7204}
7205
7206extern __inline __m256i
7207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7208_mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
7209 __m256i __Y)
7210{
7211 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7212 (__v4di) __Y,
7213 (__v4di) __W,
7214 (__mmask8) __U);
7215}
7216
7217extern __inline __m256i
7218__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7219_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
7220{
7221 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7222 (__v4di) __Y,
7223 (__v4di)
7224 _mm256_setzero_si256 (),
7225 (__mmask8) __U);
7226}
7227
7228extern __inline __m256i
7229__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7230_mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7231 __m256i __B)
7232{
7233 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7234 (__v4di) __B,
7235 (__v4di) __W, __U);
7236}
7237
7238extern __inline __m256i
7239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7240_mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7241{
7242 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7243 (__v4di) __B,
7244 (__v4di)
7245 _mm256_setzero_pd (),
7246 __U);
7247}
7248
7249extern __inline __m128i
7250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7251_mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7252 __m128i __B)
7253{
7254 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7255 (__v2di) __B,
7256 (__v2di) __W, __U);
7257}
7258
7259extern __inline __m128i
7260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7261_mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7262{
7263 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7264 (__v2di) __B,
7265 (__v2di)
7266 _mm_setzero_pd (),
7267 __U);
7268}
7269
7270extern __inline __m256i
7271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7272_mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7273 __m256i __B)
7274{
7275 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7276 (__v4di) __B,
7277 (__v4di) __W, __U);
7278}
7279
7280extern __inline __m256i
7281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7282_mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7283{
7284 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7285 (__v4di) __B,
7286 (__v4di)
7287 _mm256_setzero_pd (),
7288 __U);
7289}
7290
7291extern __inline __m128i
7292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7293_mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7294 __m128i __B)
7295{
7296 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7297 (__v2di) __B,
7298 (__v2di) __W, __U);
7299}
7300
7301extern __inline __m128i
7302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7303_mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7304{
7305 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7306 (__v2di) __B,
7307 (__v2di)
7308 _mm_setzero_pd (),
7309 __U);
7310}
7311
7312extern __inline __m256i
7313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7314_mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7315 __m256i __B)
7316{
7317 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7318 (__v4di) __B,
7319 (__v4di) __W,
7320 (__mmask8) __U);
7321}
7322
7323extern __inline __m256i
7324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7325_mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7326{
7327 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7328 (__v4di) __B,
7329 (__v4di)
7330 _mm256_setzero_si256 (),
7331 (__mmask8) __U);
7332}
7333
7334extern __inline __m128i
7335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7336_mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
7337{
7338 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7339 (__v2di) __B,
7340 (__v2di) __W,
7341 (__mmask8) __U);
7342}
7343
7344extern __inline __m128i
7345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7346_mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7347{
7348 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7349 (__v2di) __B,
7350 (__v2di)
7351 _mm_setzero_si128 (),
7352 (__mmask8) __U);
7353}
7354
7355extern __inline __m256i
7356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7357_mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7358 __m256i __B)
7359{
7360 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7361 (__v4di) __B,
7362 (__v4di) __W,
7363 (__mmask8) __U);
7364}
7365
7366extern __inline __m256i
7367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7368_mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7369{
7370 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7371 (__v4di) __B,
7372 (__v4di)
7373 _mm256_setzero_si256 (),
7374 (__mmask8) __U);
7375}
7376
7377extern __inline __m128i
7378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7379_mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7380 __m128i __B)
7381{
7382 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7383 (__v2di) __B,
7384 (__v2di) __W,
7385 (__mmask8) __U);
7386}
7387
7388extern __inline __m128i
7389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7390_mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7391{
7392 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7393 (__v2di) __B,
7394 (__v2di)
7395 _mm_setzero_si128 (),
7396 (__mmask8) __U);
7397}
7398
7399extern __inline __m256d
7400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7401_mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
7402 __m256d __B)
7403{
7404 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7405 (__v4df) __B,
7406 (__v4df) __W,
7407 (__mmask8) __U);
7408}
7409
7410extern __inline __m256d
7411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7412_mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B)
7413{
7414 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7415 (__v4df) __B,
7416 (__v4df)
7417 _mm256_setzero_pd (),
7418 (__mmask8) __U);
7419}
7420
7421extern __inline __m256
7422__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7423_mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7424{
7425 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7426 (__v8sf) __B,
7427 (__v8sf) __W,
7428 (__mmask8) __U);
7429}
7430
7431extern __inline __m256
7432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7433_mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B)
7434{
7435 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7436 (__v8sf) __B,
7437 (__v8sf)
7438 _mm256_setzero_ps (),
7439 (__mmask8) __U);
7440}
7441
7442extern __inline __m128
7443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7444_mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7445{
7446 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7447 (__v4sf) __B,
7448 (__v4sf) __W,
7449 (__mmask8) __U);
7450}
7451
7452extern __inline __m128
7453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7454_mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B)
7455{
7456 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7457 (__v4sf) __B,
7458 (__v4sf)
7459 _mm_setzero_ps (),
7460 (__mmask8) __U);
7461}
7462
7463extern __inline __m128d
7464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7465_mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7466{
7467 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7468 (__v2df) __B,
7469 (__v2df) __W,
7470 (__mmask8) __U);
7471}
7472
7473extern __inline __m128d
7474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7475_mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B)
7476{
7477 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7478 (__v2df) __B,
7479 (__v2df)
7480 _mm_setzero_pd (),
7481 (__mmask8) __U);
7482}
7483
7484extern __inline __m256d
7485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7486_mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
7487 __m256d __B)
7488{
7489 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7490 (__v4df) __B,
7491 (__v4df) __W,
7492 (__mmask8) __U);
7493}
7494
7495extern __inline __m256d
7496__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7497_mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
7498 __m256d __B)
7499{
7500 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7501 (__v4df) __B,
7502 (__v4df) __W,
7503 (__mmask8) __U);
7504}
7505
7506extern __inline __m256d
7507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7508_mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B)
7509{
7510 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7511 (__v4df) __B,
7512 (__v4df)
7513 _mm256_setzero_pd (),
7514 (__mmask8) __U);
7515}
7516
7517extern __inline __m256
7518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7519_mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7520{
7521 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7522 (__v8sf) __B,
7523 (__v8sf) __W,
7524 (__mmask8) __U);
7525}
7526
7527extern __inline __m256d
7528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7529_mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B)
7530{
7531 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7532 (__v4df) __B,
7533 (__v4df)
7534 _mm256_setzero_pd (),
7535 (__mmask8) __U);
7536}
7537
7538extern __inline __m256
7539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7540_mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7541{
7542 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7543 (__v8sf) __B,
7544 (__v8sf) __W,
7545 (__mmask8) __U);
7546}
7547
7548extern __inline __m256
7549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7550_mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B)
7551{
7552 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7553 (__v8sf) __B,
7554 (__v8sf)
7555 _mm256_setzero_ps (),
7556 (__mmask8) __U);
7557}
7558
7559extern __inline __m256
7560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7561_mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B)
7562{
7563 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7564 (__v8sf) __B,
7565 (__v8sf)
7566 _mm256_setzero_ps (),
7567 (__mmask8) __U);
7568}
7569
7570extern __inline __m128
7571__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7572_mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7573{
7574 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7575 (__v4sf) __B,
7576 (__v4sf) __W,
7577 (__mmask8) __U);
7578}
7579
7580extern __inline __m128
7581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7582_mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7583{
7584 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7585 (__v4sf) __B,
7586 (__v4sf) __W,
7587 (__mmask8) __U);
7588}
7589
7590extern __inline __m128
7591__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7592_mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B)
7593{
7594 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7595 (__v4sf) __B,
7596 (__v4sf)
7597 _mm_setzero_ps (),
7598 (__mmask8) __U);
7599}
7600
7601extern __inline __m128
7602__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7603_mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B)
7604{
7605 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7606 (__v4sf) __B,
7607 (__v4sf)
7608 _mm_setzero_ps (),
7609 (__mmask8) __U);
7610}
7611
7612extern __inline __m128
7613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7614_mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7615{
7616 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7617 (__v4sf) __B,
7618 (__v4sf) __W,
7619 (__mmask8) __U);
7620}
7621
7622extern __inline __m128
7623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7624_mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B)
7625{
7626 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7627 (__v4sf) __B,
7628 (__v4sf)
7629 _mm_setzero_ps (),
7630 (__mmask8) __U);
7631}
7632
7633extern __inline __m128d
7634__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7635_mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7636{
7637 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7638 (__v2df) __B,
7639 (__v2df) __W,
7640 (__mmask8) __U);
7641}
7642
7643extern __inline __m128d
7644__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7645_mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B)
7646{
7647 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7648 (__v2df) __B,
7649 (__v2df)
7650 _mm_setzero_pd (),
7651 (__mmask8) __U);
7652}
7653
7654extern __inline __m128d
7655__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7656_mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7657{
7658 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7659 (__v2df) __B,
7660 (__v2df) __W,
7661 (__mmask8) __U);
7662}
7663
7664extern __inline __m128d
7665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7666_mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B)
7667{
7668 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7669 (__v2df) __B,
7670 (__v2df)
7671 _mm_setzero_pd (),
7672 (__mmask8) __U);
7673}
7674
7675extern __inline __m128d
7676__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7677_mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7678{
7679 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7680 (__v2df) __B,
7681 (__v2df) __W,
7682 (__mmask8) __U);
7683}
7684
7685extern __inline __m128d
7686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7687_mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B)
7688{
7689 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7690 (__v2df) __B,
7691 (__v2df)
7692 _mm_setzero_pd (),
7693 (__mmask8) __U);
7694}
7695
7696extern __inline __m256
7697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7698_mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7699{
7700 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7701 (__v8sf) __B,
7702 (__v8sf) __W,
7703 (__mmask8) __U);
7704}
7705
7706extern __inline __m256
7707__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7708_mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B)
7709{
7710 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7711 (__v8sf) __B,
7712 (__v8sf)
7713 _mm256_setzero_ps (),
7714 (__mmask8) __U);
7715}
7716
7717extern __inline __m256d
7718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7719_mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
7720 __m256d __B)
7721{
7722 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7723 (__v4df) __B,
7724 (__v4df) __W,
7725 (__mmask8) __U);
7726}
7727
7728extern __inline __m256d
7729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7730_mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B)
7731{
7732 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7733 (__v4df) __B,
7734 (__v4df)
7735 _mm256_setzero_pd (),
7736 (__mmask8) __U);
7737}
7738
7739extern __inline __m256i
7740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7741_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7742{
7743 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7744 (__v4di) __B,
7745 (__v4di)
7746 _mm256_setzero_si256 (),
7747 __M);
7748}
7749
7750extern __inline __m256i
7751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7752_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7753 __m256i __B)
7754{
7755 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7756 (__v4di) __B,
7757 (__v4di) __W, __M);
7758}
7759
7760extern __inline __m256i
7761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7762_mm256_min_epi64 (__m256i __A, __m256i __B)
7763{
7764 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7765 (__v4di) __B,
7766 (__v4di)
7767 _mm256_setzero_si256 (),
7768 (__mmask8) -1);
7769}
7770
7771extern __inline __m256i
7772__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7773_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7774 __m256i __B)
7775{
7776 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7777 (__v4di) __B,
7778 (__v4di) __W, __M);
7779}
7780
7781extern __inline __m256i
7782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7783_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7784{
7785 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7786 (__v4di) __B,
7787 (__v4di)
7788 _mm256_setzero_si256 (),
7789 __M);
7790}
7791
7792extern __inline __m256i
7793__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7794_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7795{
7796 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7797 (__v4di) __B,
7798 (__v4di)
7799 _mm256_setzero_si256 (),
7800 __M);
7801}
7802
7803extern __inline __m256i
7804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7805_mm256_max_epi64 (__m256i __A, __m256i __B)
7806{
7807 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7808 (__v4di) __B,
7809 (__v4di)
7810 _mm256_setzero_si256 (),
7811 (__mmask8) -1);
7812}
7813
7814extern __inline __m256i
7815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7816_mm256_max_epu64 (__m256i __A, __m256i __B)
7817{
7818 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7819 (__v4di) __B,
7820 (__v4di)
7821 _mm256_setzero_si256 (),
7822 (__mmask8) -1);
7823}
7824
7825extern __inline __m256i
7826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7827_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7828 __m256i __B)
7829{
7830 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7831 (__v4di) __B,
7832 (__v4di) __W, __M);
7833}
7834
7835extern __inline __m256i
7836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7837_mm256_min_epu64 (__m256i __A, __m256i __B)
7838{
7839 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7840 (__v4di) __B,
7841 (__v4di)
7842 _mm256_setzero_si256 (),
7843 (__mmask8) -1);
7844}
7845
7846extern __inline __m256i
7847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7848_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7849 __m256i __B)
7850{
7851 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7852 (__v4di) __B,
7853 (__v4di) __W, __M);
7854}
7855
7856extern __inline __m256i
7857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7858_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7859{
7860 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7861 (__v4di) __B,
7862 (__v4di)
7863 _mm256_setzero_si256 (),
7864 __M);
7865}
7866
7867extern __inline __m256i
7868__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7869_mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7870{
7871 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7872 (__v8si) __B,
7873 (__v8si)
7874 _mm256_setzero_si256 (),
7875 __M);
7876}
7877
7878extern __inline __m256i
7879__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7880_mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7881 __m256i __B)
7882{
7883 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7884 (__v8si) __B,
7885 (__v8si) __W, __M);
7886}
7887
7888extern __inline __m256i
7889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7890_mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7891{
7892 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7893 (__v8si) __B,
7894 (__v8si)
7895 _mm256_setzero_si256 (),
7896 __M);
7897}
7898
7899extern __inline __m256i
7900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7901_mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7902 __m256i __B)
7903{
7904 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7905 (__v8si) __B,
7906 (__v8si) __W, __M);
7907}
7908
7909extern __inline __m256i
7910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7911_mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7912{
7913 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7914 (__v8si) __B,
7915 (__v8si)
7916 _mm256_setzero_si256 (),
7917 __M);
7918}
7919
7920extern __inline __m256i
7921__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7922_mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
7923 __m256i __B)
7924{
7925 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7926 (__v8si) __B,
7927 (__v8si) __W, __M);
7928}
7929
7930extern __inline __m256i
7931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7932_mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7933{
7934 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
7935 (__v8si) __B,
7936 (__v8si)
7937 _mm256_setzero_si256 (),
7938 __M);
7939}
7940
7941extern __inline __m256i
7942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7943_mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
7944 __m256i __B)
7945{
7946 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
7947 (__v8si) __B,
7948 (__v8si) __W, __M);
7949}
7950
7951extern __inline __m128i
7952__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7953_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
7954{
7955 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
7956 (__v2di) __B,
7957 (__v2di)
7958 _mm_setzero_si128 (),
7959 __M);
7960}
7961
7962extern __inline __m128i
7963__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7964_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
7965 __m128i __B)
7966{
7967 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
7968 (__v2di) __B,
7969 (__v2di) __W, __M);
7970}
7971
7972extern __inline __m128i
7973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7974_mm_min_epi64 (__m128i __A, __m128i __B)
7975{
7976 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
7977 (__v2di) __B,
7978 (__v2di)
7979 _mm_setzero_di (),
7980 (__mmask8) -1);
7981}
7982
7983extern __inline __m128i
7984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7985_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
7986 __m128i __B)
7987{
7988 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
7989 (__v2di) __B,
7990 (__v2di) __W, __M);
7991}
7992
7993extern __inline __m128i
7994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7995_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
7996{
7997 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
7998 (__v2di) __B,
7999 (__v2di)
8000 _mm_setzero_si128 (),
8001 __M);
8002}
8003
8004extern __inline __m128i
8005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8006_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8007{
8008 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8009 (__v2di) __B,
8010 (__v2di)
8011 _mm_setzero_si128 (),
8012 __M);
8013}
8014
8015extern __inline __m128i
8016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8017_mm_max_epi64 (__m128i __A, __m128i __B)
8018{
8019 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8020 (__v2di) __B,
8021 (__v2di)
8022 _mm_setzero_di (),
8023 (__mmask8) -1);
8024}
8025
8026extern __inline __m128i
8027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8028_mm_max_epu64 (__m128i __A, __m128i __B)
8029{
8030 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8031 (__v2di) __B,
8032 (__v2di)
8033 _mm_setzero_di (),
8034 (__mmask8) -1);
8035}
8036
8037extern __inline __m128i
8038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8039_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8040 __m128i __B)
8041{
8042 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8043 (__v2di) __B,
8044 (__v2di) __W, __M);
8045}
8046
8047extern __inline __m128i
8048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8049_mm_min_epu64 (__m128i __A, __m128i __B)
8050{
8051 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8052 (__v2di) __B,
8053 (__v2di)
8054 _mm_setzero_di (),
8055 (__mmask8) -1);
8056}
8057
8058extern __inline __m128i
8059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8060_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8061 __m128i __B)
8062{
8063 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8064 (__v2di) __B,
8065 (__v2di) __W, __M);
8066}
8067
8068extern __inline __m128i
8069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8070_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8071{
8072 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8073 (__v2di) __B,
8074 (__v2di)
8075 _mm_setzero_si128 (),
8076 __M);
8077}
8078
8079extern __inline __m128i
8080__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8081_mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8082{
8083 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8084 (__v4si) __B,
8085 (__v4si)
8086 _mm_setzero_si128 (),
8087 __M);
8088}
8089
8090extern __inline __m128i
8091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8092_mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8093 __m128i __B)
8094{
8095 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8096 (__v4si) __B,
8097 (__v4si) __W, __M);
8098}
8099
8100extern __inline __m128i
8101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8102_mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8103{
8104 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8105 (__v4si) __B,
8106 (__v4si)
8107 _mm_setzero_si128 (),
8108 __M);
8109}
8110
8111extern __inline __m128i
8112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8113_mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8114 __m128i __B)
8115{
8116 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8117 (__v4si) __B,
8118 (__v4si) __W, __M);
8119}
8120
8121extern __inline __m128i
8122__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8123_mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8124{
8125 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8126 (__v4si) __B,
8127 (__v4si)
8128 _mm_setzero_si128 (),
8129 __M);
8130}
8131
8132extern __inline __m128i
8133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8134_mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8135 __m128i __B)
8136{
8137 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8138 (__v4si) __B,
8139 (__v4si) __W, __M);
8140}
8141
8142extern __inline __m128i
8143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8144_mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8145{
8146 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8147 (__v4si) __B,
8148 (__v4si)
8149 _mm_setzero_si128 (),
8150 __M);
8151}
8152
8153extern __inline __m128i
8154__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8155_mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8156 __m128i __B)
8157{
8158 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8159 (__v4si) __B,
8160 (__v4si) __W, __M);
8161}
8162
8163#ifndef __AVX512CD__
8164#pragma GCC push_options
8165#pragma GCC target("avx512vl,avx512cd")
8166#define __DISABLE_AVX512VLCD__
8167#endif
8168
8169extern __inline __m128i
8170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8171_mm_broadcastmb_epi64 (__mmask8 __A)
8172{
8173 return (__m128i) __builtin_ia32_broadcastmb128 (__A);
8174}
8175
8176extern __inline __m256i
8177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8178_mm256_broadcastmb_epi64 (__mmask8 __A)
8179{
8180 return (__m256i) __builtin_ia32_broadcastmb256 (__A);
8181}
8182
8183extern __inline __m128i
8184__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8185_mm_broadcastmw_epi32 (__mmask16 __A)
8186{
8187 return (__m128i) __builtin_ia32_broadcastmw128 (__A);
8188}
8189
8190extern __inline __m256i
8191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8192_mm256_broadcastmw_epi32 (__mmask16 __A)
8193{
8194 return (__m256i) __builtin_ia32_broadcastmw256 (__A);
8195}
8196
8197extern __inline __m256i
8198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8199_mm256_lzcnt_epi32 (__m256i __A)
8200{
8201 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8202 (__v8si)
8203 _mm256_setzero_si256 (),
8204 (__mmask8) -1);
8205}
8206
8207extern __inline __m256i
8208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8209_mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8210{
8211 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8212 (__v8si) __W,
8213 (__mmask8) __U);
8214}
8215
8216extern __inline __m256i
8217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8218_mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
8219{
8220 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8221 (__v8si)
8222 _mm256_setzero_si256 (),
8223 (__mmask8) __U);
8224}
8225
8226extern __inline __m256i
8227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8228_mm256_lzcnt_epi64 (__m256i __A)
8229{
8230 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8231 (__v4di)
8232 _mm256_setzero_si256 (),
8233 (__mmask8) -1);
8234}
8235
8236extern __inline __m256i
8237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8238_mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8239{
8240 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8241 (__v4di) __W,
8242 (__mmask8) __U);
8243}
8244
8245extern __inline __m256i
8246__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8247_mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
8248{
8249 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8250 (__v4di)
8251 _mm256_setzero_si256 (),
8252 (__mmask8) __U);
8253}
8254
8255extern __inline __m256i
8256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8257_mm256_conflict_epi64 (__m256i __A)
8258{
8259 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8260 (__v4di)
8261 _mm256_setzero_si256 (),
8262 (__mmask8) -
8263 1);
8264}
8265
8266extern __inline __m256i
8267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8268_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8269{
8270 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8271 (__v4di) __W,
8272 (__mmask8)
8273 __U);
8274}
8275
8276extern __inline __m256i
8277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8278_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
8279{
8280 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8281 (__v4di)
8282 _mm256_setzero_si256 (),
8283 (__mmask8)
8284 __U);
8285}
8286
8287extern __inline __m256i
8288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8289_mm256_conflict_epi32 (__m256i __A)
8290{
8291 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8292 (__v8si)
8293 _mm256_setzero_si256 (),
8294 (__mmask8) -
8295 1);
8296}
8297
8298extern __inline __m256i
8299__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8300_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8301{
8302 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8303 (__v8si) __W,
8304 (__mmask8)
8305 __U);
8306}
8307
8308extern __inline __m256i
8309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8310_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
8311{
8312 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8313 (__v8si)
8314 _mm256_setzero_si256 (),
8315 (__mmask8)
8316 __U);
8317}
8318
8319extern __inline __m128i
8320__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8321_mm_lzcnt_epi32 (__m128i __A)
8322{
8323 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8324 (__v4si)
8325 _mm_setzero_si128 (),
8326 (__mmask8) -1);
8327}
8328
8329extern __inline __m128i
8330__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8331_mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8332{
8333 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8334 (__v4si) __W,
8335 (__mmask8) __U);
8336}
8337
8338extern __inline __m128i
8339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8340_mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
8341{
8342 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8343 (__v4si)
8344 _mm_setzero_si128 (),
8345 (__mmask8) __U);
8346}
8347
8348extern __inline __m128i
8349__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8350_mm_lzcnt_epi64 (__m128i __A)
8351{
8352 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8353 (__v2di)
8354 _mm_setzero_di (),
8355 (__mmask8) -1);
8356}
8357
8358extern __inline __m128i
8359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8360_mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8361{
8362 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8363 (__v2di) __W,
8364 (__mmask8) __U);
8365}
8366
8367extern __inline __m128i
8368__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8369_mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
8370{
8371 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8372 (__v2di)
8373 _mm_setzero_di (),
8374 (__mmask8) __U);
8375}
8376
8377extern __inline __m128i
8378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8379_mm_conflict_epi64 (__m128i __A)
8380{
8381 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8382 (__v2di)
8383 _mm_setzero_di (),
8384 (__mmask8) -
8385 1);
8386}
8387
8388extern __inline __m128i
8389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8390_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8391{
8392 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8393 (__v2di) __W,
8394 (__mmask8)
8395 __U);
8396}
8397
8398extern __inline __m128i
8399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8400_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
8401{
8402 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8403 (__v2di)
8404 _mm_setzero_di (),
8405 (__mmask8)
8406 __U);
8407}
8408
8409extern __inline __m128i
8410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8411_mm_conflict_epi32 (__m128i __A)
8412{
8413 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8414 (__v4si)
8415 _mm_setzero_si128 (),
8416 (__mmask8) -
8417 1);
8418}
8419
8420extern __inline __m128i
8421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8422_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8423{
8424 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8425 (__v4si) __W,
8426 (__mmask8)
8427 __U);
8428}
8429
8430extern __inline __m128i
8431__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8432_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
8433{
8434 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8435 (__v4si)
8436 _mm_setzero_si128 (),
8437 (__mmask8)
8438 __U);
8439}
8440
8441#ifdef __DISABLE_AVX512VLCD__
8442#pragma GCC pop_options
8443#endif
8444
8445extern __inline __m256d
8446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8447_mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A,
8448 __m256d __B)
8449{
8450 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8451 (__v4df) __B,
8452 (__v4df) __W,
8453 (__mmask8) __U);
8454}
8455
8456extern __inline __m256d
8457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8458_mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
8459{
8460 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8461 (__v4df) __B,
8462 (__v4df)
8463 _mm256_setzero_pd (),
8464 (__mmask8) __U);
8465}
8466
8467extern __inline __m128d
8468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8469_mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A,
8470 __m128d __B)
8471{
8472 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8473 (__v2df) __B,
8474 (__v2df) __W,
8475 (__mmask8) __U);
8476}
8477
8478extern __inline __m128d
8479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8480_mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
8481{
8482 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8483 (__v2df) __B,
8484 (__v2df)
8485 _mm_setzero_pd (),
8486 (__mmask8) __U);
8487}
8488
8489extern __inline __m256
8490__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8491_mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A,
8492 __m256 __B)
8493{
8494 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8495 (__v8sf) __B,
8496 (__v8sf) __W,
8497 (__mmask8) __U);
8498}
8499
8500extern __inline __m256d
8501__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8502_mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A,
8503 __m256d __B)
8504{
8505 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8506 (__v4df) __B,
8507 (__v4df) __W,
8508 (__mmask8) __U);
8509}
8510
8511extern __inline __m256d
8512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8513_mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
8514{
8515 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8516 (__v4df) __B,
8517 (__v4df)
8518 _mm256_setzero_pd (),
8519 (__mmask8) __U);
8520}
8521
8522extern __inline __m128d
8523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8524_mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A,
8525 __m128d __B)
8526{
8527 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8528 (__v2df) __B,
8529 (__v2df) __W,
8530 (__mmask8) __U);
8531}
8532
8533extern __inline __m128d
8534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8535_mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
8536{
8537 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8538 (__v2df) __B,
8539 (__v2df)
8540 _mm_setzero_pd (),
8541 (__mmask8) __U);
8542}
8543
8544extern __inline __m256
8545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8546_mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A,
8547 __m256 __B)
8548{
8549 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8550 (__v8sf) __B,
8551 (__v8sf) __W,
8552 (__mmask8) __U);
8553}
8554
8555extern __inline __m256
8556__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8557_mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
8558{
8559 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8560 (__v8sf) __B,
8561 (__v8sf)
8562 _mm256_setzero_ps (),
8563 (__mmask8) __U);
8564}
8565
8566extern __inline __m128
8567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8568_mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8569{
8570 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8571 (__v4sf) __B,
8572 (__v4sf) __W,
8573 (__mmask8) __U);
8574}
8575
8576extern __inline __m128
8577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8578_mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
8579{
8580 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8581 (__v4sf) __B,
8582 (__v4sf)
8583 _mm_setzero_ps (),
8584 (__mmask8) __U);
8585}
8586
8587extern __inline __m128
8588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8589_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8590{
8591 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8592 (__v4sf) __W,
8593 (__mmask8) __U);
8594}
8595
8596extern __inline __m128
8597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8598_mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8599{
8600 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8601 (__v4sf)
8602 _mm_setzero_ps (),
8603 (__mmask8) __U);
8604}
8605
8606extern __inline __m256
8607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8608_mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
8609{
8610 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8611 (__v8sf) __B,
8612 (__v8sf)
8613 _mm256_setzero_ps (),
8614 (__mmask8) __U);
8615}
8616
8617extern __inline __m256
8618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8619_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8620{
8621 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8622 (__v8sf) __W,
8623 (__mmask8) __U);
8624}
8625
8626extern __inline __m256
8627__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8628_mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8629{
8630 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8631 (__v8sf)
8632 _mm256_setzero_ps (),
8633 (__mmask8) __U);
8634}
8635
8636extern __inline __m128
8637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8638_mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8639{
8640 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8641 (__v4sf) __B,
8642 (__v4sf) __W,
8643 (__mmask8) __U);
8644}
8645
8646extern __inline __m128
8647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8648_mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
8649{
8650 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8651 (__v4sf) __B,
8652 (__v4sf)
8653 _mm_setzero_ps (),
8654 (__mmask8) __U);
8655}
8656
8657extern __inline __m256i
8658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8659_mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8660 __m128i __B)
8661{
8662 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8663 (__v4si) __B,
8664 (__v8si) __W,
8665 (__mmask8) __U);
8666}
8667
8668extern __inline __m256i
8669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8670_mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8671{
8672 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8673 (__v4si) __B,
8674 (__v8si)
8675 _mm256_setzero_si256 (),
8676 (__mmask8) __U);
8677}
8678
8679extern __inline __m128i
8680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8681_mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8682 __m128i __B)
8683{
8684 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8685 (__v4si) __B,
8686 (__v4si) __W,
8687 (__mmask8) __U);
8688}
8689
8690extern __inline __m128i
8691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8692_mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8693{
8694 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8695 (__v4si) __B,
8696 (__v4si)
8697 _mm_setzero_si128 (),
8698 (__mmask8) __U);
8699}
8700
8701extern __inline __m256i
8702__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8703_mm256_sra_epi64 (__m256i __A, __m128i __B)
8704{
8705 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8706 (__v2di) __B,
8707 (__v4di)
8708 _mm256_setzero_si256 (),
8709 (__mmask8) -1);
8710}
8711
8712extern __inline __m256i
8713__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8714_mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8715 __m128i __B)
8716{
8717 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8718 (__v2di) __B,
8719 (__v4di) __W,
8720 (__mmask8) __U);
8721}
8722
8723extern __inline __m256i
8724__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8725_mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8726{
8727 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8728 (__v2di) __B,
8729 (__v4di)
8730 _mm256_setzero_si256 (),
8731 (__mmask8) __U);
8732}
8733
8734extern __inline __m128i
8735__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8736_mm_sra_epi64 (__m128i __A, __m128i __B)
8737{
8738 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8739 (__v2di) __B,
8740 (__v2di)
8741 _mm_setzero_di (),
8742 (__mmask8) -1);
8743}
8744
8745extern __inline __m128i
8746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8747_mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8748 __m128i __B)
8749{
8750 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8751 (__v2di) __B,
8752 (__v2di) __W,
8753 (__mmask8) __U);
8754}
8755
8756extern __inline __m128i
8757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8758_mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8759{
8760 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8761 (__v2di) __B,
8762 (__v2di)
8763 _mm_setzero_di (),
8764 (__mmask8) __U);
8765}
8766
8767extern __inline __m128i
8768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8769_mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8770 __m128i __B)
8771{
8772 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8773 (__v4si) __B,
8774 (__v4si) __W,
8775 (__mmask8) __U);
8776}
8777
8778extern __inline __m128i
8779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8780_mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8781{
8782 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8783 (__v4si) __B,
8784 (__v4si)
8785 _mm_setzero_si128 (),
8786 (__mmask8) __U);
8787}
8788
8789extern __inline __m128i
8790__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8791_mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8792 __m128i __B)
8793{
8794 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8795 (__v2di) __B,
8796 (__v2di) __W,
8797 (__mmask8) __U);
8798}
8799
8800extern __inline __m128i
8801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8802_mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8803{
8804 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8805 (__v2di) __B,
8806 (__v2di)
8807 _mm_setzero_di (),
8808 (__mmask8) __U);
8809}
8810
8811extern __inline __m256i
8812__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8813_mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8814 __m128i __B)
8815{
8816 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8817 (__v4si) __B,
8818 (__v8si) __W,
8819 (__mmask8) __U);
8820}
8821
8822extern __inline __m256i
8823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8824_mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8825{
8826 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8827 (__v4si) __B,
8828 (__v8si)
8829 _mm256_setzero_si256 (),
8830 (__mmask8) __U);
8831}
8832
8833extern __inline __m256i
8834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8835_mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8836 __m128i __B)
8837{
8838 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8839 (__v2di) __B,
8840 (__v4di) __W,
8841 (__mmask8) __U);
8842}
8843
8844extern __inline __m256i
8845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8846_mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8847{
8848 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8849 (__v2di) __B,
8850 (__v4di)
8851 _mm256_setzero_si256 (),
8852 (__mmask8) __U);
8853}
8854
8855extern __inline __m256
8856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8857_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
8858 __m256 __Y)
8859{
8860 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8861 (__v8si) __X,
8862 (__v8sf) __W,
8863 (__mmask8) __U);
8864}
8865
8866extern __inline __m256
8867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8868_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
8869{
8870 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8871 (__v8si) __X,
8872 (__v8sf)
8873 _mm256_setzero_ps (),
8874 (__mmask8) __U);
8875}
8876
8877extern __inline __m256d
8878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8879_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8880{
8881 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8882 (__v4di) __X,
8883 (__v4df)
8884 _mm256_setzero_pd (),
8885 (__mmask8) -1);
8886}
8887
8888extern __inline __m256d
8889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8890_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8891 __m256d __Y)
8892{
8893 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8894 (__v4di) __X,
8895 (__v4df) __W,
8896 (__mmask8) __U);
8897}
8898
8899extern __inline __m256d
8900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8901_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8902{
8903 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8904 (__v4di) __X,
8905 (__v4df)
8906 _mm256_setzero_pd (),
8907 (__mmask8) __U);
8908}
8909
8910extern __inline __m256d
8911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8912_mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
8913 __m256i __C)
8914{
8915 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8916 (__v4di) __C,
8917 (__v4df) __W,
8918 (__mmask8)
8919 __U);
8920}
8921
8922extern __inline __m256d
8923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8924_mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
8925{
8926 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8927 (__v4di) __C,
8928 (__v4df)
8929 _mm256_setzero_pd (),
8930 (__mmask8)
8931 __U);
8932}
8933
8934extern __inline __m256
8935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8936_mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
8937 __m256i __C)
8938{
8939 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
8940 (__v8si) __C,
8941 (__v8sf) __W,
8942 (__mmask8) __U);
8943}
8944
8945extern __inline __m256
8946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8947_mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
8948{
8949 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
8950 (__v8si) __C,
8951 (__v8sf)
8952 _mm256_setzero_ps (),
8953 (__mmask8) __U);
8954}
8955
8956extern __inline __m128d
8957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8958_mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
8959 __m128i __C)
8960{
8961 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
8962 (__v2di) __C,
8963 (__v2df) __W,
8964 (__mmask8) __U);
8965}
8966
8967extern __inline __m128d
8968__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8969_mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
8970{
8971 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
8972 (__v2di) __C,
8973 (__v2df)
8974 _mm_setzero_pd (),
8975 (__mmask8) __U);
8976}
8977
8978extern __inline __m128
8979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8980_mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
8981 __m128i __C)
8982{
8983 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
8984 (__v4si) __C,
8985 (__v4sf) __W,
8986 (__mmask8) __U);
8987}
8988
8989extern __inline __m128
8990__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8991_mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
8992{
8993 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
8994 (__v4si) __C,
8995 (__v4sf)
8996 _mm_setzero_ps (),
8997 (__mmask8) __U);
8998}
8999
9000extern __inline __m256i
9001__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9002_mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
9003{
9004 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9005 (__v8si) __B,
9006 (__v8si)
9007 _mm256_setzero_si256 (),
9008 __M);
9009}
9010
9011extern __inline __m256i
9012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9013_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
9014{
9015 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9016 (__v4di) __X,
9017 (__v4di)
9018 _mm256_setzero_si256 (),
9019 __M);
9020}
9021
9022extern __inline __m256i
9023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9024_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
9025 __m256i __B)
9026{
9027 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9028 (__v8si) __B,
9029 (__v8si) __W, __M);
9030}
9031
9032extern __inline __m128i
9033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9034_mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
9035{
9036 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9037 (__v4si) __B,
9038 (__v4si)
9039 _mm_setzero_si128 (),
9040 __M);
9041}
9042
9043extern __inline __m128i
9044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9045_mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
9046 __m128i __B)
9047{
9048 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9049 (__v4si) __B,
9050 (__v4si) __W, __M);
9051}
9052
9053extern __inline __m256i
9054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9055_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9056 __m256i __Y)
9057{
9058 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9059 (__v8si) __Y,
9060 (__v4di) __W, __M);
9061}
9062
9063extern __inline __m256i
9064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9065_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9066{
9067 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9068 (__v8si) __Y,
9069 (__v4di)
9070 _mm256_setzero_si256 (),
9071 __M);
9072}
9073
9074extern __inline __m128i
9075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9076_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
9077 __m128i __Y)
9078{
9079 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9080 (__v4si) __Y,
9081 (__v2di) __W, __M);
9082}
9083
9084extern __inline __m128i
9085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9086_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
9087{
9088 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9089 (__v4si) __Y,
9090 (__v2di)
9091 _mm_setzero_si128 (),
9092 __M);
9093}
9094
9095extern __inline __m256i
9096__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9097_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
9098 __m256i __Y)
9099{
9100 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9101 (__v4di) __X,
9102 (__v4di) __W,
9103 __M);
9104}
9105
9106extern __inline __m256i
9107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9108_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
9109 __m256i __Y)
9110{
9111 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9112 (__v8si) __Y,
9113 (__v4di) __W, __M);
9114}
9115
9116extern __inline __m256i
9117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9118_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9119{
9120 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9121 (__v8si) __X,
9122 (__v8si)
9123 _mm256_setzero_si256 (),
9124 __M);
9125}
9126
9127extern __inline __m256i
9128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9129_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
9130{
9131 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9132 (__v8si) __Y,
9133 (__v4di)
9134 _mm256_setzero_si256 (),
9135 __M);
9136}
9137
9138extern __inline __m128i
9139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9140_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
9141 __m128i __Y)
9142{
9143 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9144 (__v4si) __Y,
9145 (__v2di) __W, __M);
9146}
9147
9148extern __inline __m128i
9149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9150_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
9151{
9152 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9153 (__v4si) __Y,
9154 (__v2di)
9155 _mm_setzero_si128 (),
9156 __M);
9157}
9158
9159extern __inline __m256i
9160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9161_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9162 __m256i __Y)
9163{
9164 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9165 (__v8si) __X,
9166 (__v8si) __W,
9167 __M);
9168}
9169
9170#ifdef __OPTIMIZE__
9171extern __inline __m256i
9172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9173_mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
9174 __m256i __X, const int __I)
9175{
9176 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9177 __I,
9178 (__v4di) __W,
9179 (__mmask8) __M);
9180}
9181
9182extern __inline __m256i
9183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9184_mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I)
9185{
9186 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9187 __I,
9188 (__v4di)
9189 _mm256_setzero_si256 (),
9190 (__mmask8) __M);
9191}
9192
9193extern __inline __m256d
9194__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9195_mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A,
9196 __m256d __B, const int __imm)
9197{
9198 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9199 (__v4df) __B, __imm,
9200 (__v4df) __W,
9201 (__mmask8) __U);
9202}
9203
9204extern __inline __m256d
9205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9206_mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B,
9207 const int __imm)
9208{
9209 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9210 (__v4df) __B, __imm,
9211 (__v4df)
9212 _mm256_setzero_pd (),
9213 (__mmask8) __U);
9214}
9215
9216extern __inline __m128d
9217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9218_mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A,
9219 __m128d __B, const int __imm)
9220{
9221 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9222 (__v2df) __B, __imm,
9223 (__v2df) __W,
9224 (__mmask8) __U);
9225}
9226
9227extern __inline __m128d
9228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9229_mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B,
9230 const int __imm)
9231{
9232 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9233 (__v2df) __B, __imm,
9234 (__v2df)
9235 _mm_setzero_pd (),
9236 (__mmask8) __U);
9237}
9238
9239extern __inline __m256
9240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9241_mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A,
9242 __m256 __B, const int __imm)
9243{
9244 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9245 (__v8sf) __B, __imm,
9246 (__v8sf) __W,
9247 (__mmask8) __U);
9248}
9249
9250extern __inline __m256
9251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9252_mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B,
9253 const int __imm)
9254{
9255 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9256 (__v8sf) __B, __imm,
9257 (__v8sf)
9258 _mm256_setzero_ps (),
9259 (__mmask8) __U);
9260}
9261
9262extern __inline __m128
9263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9264_mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
9265 const int __imm)
9266{
9267 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9268 (__v4sf) __B, __imm,
9269 (__v4sf) __W,
9270 (__mmask8) __U);
9271}
9272
9273extern __inline __m128
9274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9275_mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B,
9276 const int __imm)
9277{
9278 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9279 (__v4sf) __B, __imm,
9280 (__v4sf)
9281 _mm_setzero_ps (),
9282 (__mmask8) __U);
9283}
9284
9285extern __inline __m256i
9286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9287_mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm)
9288{
9289 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9290 (__v4si) __B,
9291 __imm,
9292 (__v8si)
9293 _mm256_setzero_si256 (),
9294 (__mmask8) -
9295 1);
9296}
9297
9298extern __inline __m256i
9299__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9300_mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A,
9301 __m128i __B, const int __imm)
9302{
9303 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9304 (__v4si) __B,
9305 __imm,
9306 (__v8si) __W,
9307 (__mmask8)
9308 __U);
9309}
9310
9311extern __inline __m256i
9312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9313_mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B,
9314 const int __imm)
9315{
9316 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9317 (__v4si) __B,
9318 __imm,
9319 (__v8si)
9320 _mm256_setzero_si256 (),
9321 (__mmask8)
9322 __U);
9323}
9324
9325extern __inline __m256
9326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9327_mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm)
9328{
9329 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9330 (__v4sf) __B,
9331 __imm,
9332 (__v8sf)
9333 _mm256_setzero_ps (),
9334 (__mmask8) -1);
9335}
9336
9337extern __inline __m256
9338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9339_mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A,
9340 __m128 __B, const int __imm)
9341{
9342 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9343 (__v4sf) __B,
9344 __imm,
9345 (__v8sf) __W,
9346 (__mmask8) __U);
9347}
9348
9349extern __inline __m256
9350__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9351_mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B,
9352 const int __imm)
9353{
9354 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9355 (__v4sf) __B,
9356 __imm,
9357 (__v8sf)
9358 _mm256_setzero_ps (),
9359 (__mmask8) __U);
9360}
9361
9362extern __inline __m128i
9363__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9364_mm256_extracti32x4_epi32 (__m256i __A, const int __imm)
9365{
9366 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9367 __imm,
9368 (__v4si)
9369 _mm_setzero_si128 (),
9370 (__mmask8) -
9371 1);
9372}
9373
9374extern __inline __m128i
9375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9376_mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A,
9377 const int __imm)
9378{
9379 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9380 __imm,
9381 (__v4si) __W,
9382 (__mmask8)
9383 __U);
9384}
9385
9386extern __inline __m128i
9387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9388_mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A,
9389 const int __imm)
9390{
9391 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9392 __imm,
9393 (__v4si)
9394 _mm_setzero_si128 (),
9395 (__mmask8)
9396 __U);
9397}
9398
9399extern __inline __m128
9400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9401_mm256_extractf32x4_ps (__m256 __A, const int __imm)
9402{
9403 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
9404 __imm,
9405 (__v4sf)
9406 _mm_setzero_ps (),
9407 (__mmask8) -
9408 1);
9409}
9410
9411extern __inline __m128
9412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9413_mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A,
9414 const int __imm)
9415{
9416 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
9417 __imm,
9418 (__v4sf) __W,
9419 (__mmask8)
9420 __U);
9421}
9422
9423extern __inline __m128
9424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9425_mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A,
9426 const int __imm)
9427{
9428 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
9429 __imm,
9430 (__v4sf)
9431 _mm_setzero_ps (),
9432 (__mmask8)
9433 __U);
9434}
9435
9436extern __inline __m256i
9437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9438_mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm)
9439{
9440 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
9441 (__v4di) __B,
9442 __imm,
9443 (__v4di)
9444 _mm256_setzero_si256 (),
9445 (__mmask8) -1);
9446}
9447
9448extern __inline __m256i
9449__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9450_mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A,
9451 __m256i __B, const int __imm)
9452{
9453 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
9454 (__v4di) __B,
9455 __imm,
9456 (__v4di) __W,
9457 (__mmask8) __U);
9458}
9459
9460extern __inline __m256i
9461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9462_mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B,
9463 const int __imm)
9464{
9465 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
9466 (__v4di) __B,
9467 __imm,
9468 (__v4di)
9469 _mm256_setzero_si256 (),
9470 (__mmask8) __U);
9471}
9472
9473extern __inline __m256i
9474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9475_mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm)
9476{
9477 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
9478 (__v8si) __B,
9479 __imm,
9480 (__v8si)
9481 _mm256_setzero_si256 (),
9482 (__mmask8) -1);
9483}
9484
9485extern __inline __m256i
9486__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9487_mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A,
9488 __m256i __B, const int __imm)
9489{
9490 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
9491 (__v8si) __B,
9492 __imm,
9493 (__v8si) __W,
9494 (__mmask8) __U);
9495}
9496
9497extern __inline __m256i
9498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9499_mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B,
9500 const int __imm)
9501{
9502 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
9503 (__v8si) __B,
9504 __imm,
9505 (__v8si)
9506 _mm256_setzero_si256 (),
9507 (__mmask8) __U);
9508}
9509
9510extern __inline __m256d
9511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9512_mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm)
9513{
9514 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
9515 (__v4df) __B,
9516 __imm,
9517 (__v4df)
9518 _mm256_setzero_pd (),
9519 (__mmask8) -1);
9520}
9521
9522extern __inline __m256d
9523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9524_mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A,
9525 __m256d __B, const int __imm)
9526{
9527 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
9528 (__v4df) __B,
9529 __imm,
9530 (__v4df) __W,
9531 (__mmask8) __U);
9532}
9533
9534extern __inline __m256d
9535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9536_mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B,
9537 const int __imm)
9538{
9539 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
9540 (__v4df) __B,
9541 __imm,
9542 (__v4df)
9543 _mm256_setzero_pd (),
9544 (__mmask8) __U);
9545}
9546
9547extern __inline __m256
9548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9549_mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm)
9550{
9551 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
9552 (__v8sf) __B,
9553 __imm,
9554 (__v8sf)
9555 _mm256_setzero_ps (),
9556 (__mmask8) -1);
9557}
9558
9559extern __inline __m256
9560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9561_mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A,
9562 __m256 __B, const int __imm)
9563{
9564 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
9565 (__v8sf) __B,
9566 __imm,
9567 (__v8sf) __W,
9568 (__mmask8) __U);
9569}
9570
9571extern __inline __m256
9572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9573_mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B,
9574 const int __imm)
9575{
9576 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
9577 (__v8sf) __B,
9578 __imm,
9579 (__v8sf)
9580 _mm256_setzero_ps (),
9581 (__mmask8) __U);
9582}
9583
9584extern __inline __m256d
9585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9586_mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C,
9587 const int __imm)
9588{
9589 return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
9590 (__v4df) __B,
9591 (__v4di) __C,
9592 __imm,
9593 (__mmask8) -1);
9594}
9595
9596extern __inline __m256d
9597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9598_mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B,
9599 __m256i __C, const int __imm)
9600{
9601 return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
9602 (__v4df) __B,
9603 (__v4di) __C,
9604 __imm,
9605 (__mmask8) __U);
9606}
9607
9608extern __inline __m256d
9609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9610_mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B,
9611 __m256i __C, const int __imm)
9612{
9613 return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A,
9614 (__v4df) __B,
9615 (__v4di) __C,
9616 __imm,
9617 (__mmask8) __U);
9618}
9619
9620extern __inline __m256
9621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9622_mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C,
9623 const int __imm)
9624{
9625 return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
9626 (__v8sf) __B,
9627 (__v8si) __C,
9628 __imm,
9629 (__mmask8) -1);
9630}
9631
9632extern __inline __m256
9633__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9634_mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B,
9635 __m256i __C, const int __imm)
9636{
9637 return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
9638 (__v8sf) __B,
9639 (__v8si) __C,
9640 __imm,
9641 (__mmask8) __U);
9642}
9643
9644extern __inline __m256
9645__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9646_mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B,
9647 __m256i __C, const int __imm)
9648{
9649 return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A,
9650 (__v8sf) __B,
9651 (__v8si) __C,
9652 __imm,
9653 (__mmask8) __U);
9654}
9655
9656extern __inline __m128d
9657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9658_mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C,
9659 const int __imm)
9660{
9661 return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
9662 (__v2df) __B,
9663 (__v2di) __C,
9664 __imm,
9665 (__mmask8) -1);
9666}
9667
9668extern __inline __m128d
9669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9670_mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B,
9671 __m128i __C, const int __imm)
9672{
9673 return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
9674 (__v2df) __B,
9675 (__v2di) __C,
9676 __imm,
9677 (__mmask8) __U);
9678}
9679
9680extern __inline __m128d
9681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9682_mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B,
9683 __m128i __C, const int __imm)
9684{
9685 return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A,
9686 (__v2df) __B,
9687 (__v2di) __C,
9688 __imm,
9689 (__mmask8) __U);
9690}
9691
9692extern __inline __m128
9693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9694_mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm)
9695{
9696 return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
9697 (__v4sf) __B,
9698 (__v4si) __C,
9699 __imm,
9700 (__mmask8) -1);
9701}
9702
9703extern __inline __m128
9704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9705_mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B,
9706 __m128i __C, const int __imm)
9707{
9708 return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
9709 (__v4sf) __B,
9710 (__v4si) __C,
9711 __imm,
9712 (__mmask8) __U);
9713}
9714
9715extern __inline __m128
9716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9717_mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B,
9718 __m128i __C, const int __imm)
9719{
9720 return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A,
9721 (__v4sf) __B,
9722 (__v4si) __C,
9723 __imm,
9724 (__mmask8) __U);
9725}
9726
9727extern __inline __m256i
9728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9729_mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
9730 const int __imm)
9731{
9732 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
9733 (__v8si) __W,
9734 (__mmask8) __U);
9735}
9736
9737extern __inline __m256i
9738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9739_mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm)
9740{
9741 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
9742 (__v8si)
9743 _mm256_setzero_si256 (),
9744 (__mmask8) __U);
9745}
9746
9747extern __inline __m128i
9748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9749_mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
9750 const int __imm)
9751{
9752 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
9753 (__v4si) __W,
9754 (__mmask8) __U);
9755}
9756
9757extern __inline __m128i
9758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9759_mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm)
9760{
9761 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
9762 (__v4si)
9763 _mm_setzero_si128 (),
9764 (__mmask8) __U);
9765}
9766
9767extern __inline __m256i
9768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9769_mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
9770 const int __imm)
9771{
9772 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
9773 (__v4di) __W,
9774 (__mmask8) __U);
9775}
9776
9777extern __inline __m256i
9778__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9779_mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm)
9780{
9781 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
9782 (__v4di)
9783 _mm256_setzero_si256 (),
9784 (__mmask8) __U);
9785}
9786
9787extern __inline __m128i
9788__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9789_mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
9790 const int __imm)
9791{
9792 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
9793 (__v2di) __W,
9794 (__mmask8) __U);
9795}
9796
9797extern __inline __m128i
9798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9799_mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm)
9800{
9801 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
9802 (__v2di)
9803 _mm_setzero_si128 (),
9804 (__mmask8) __U);
9805}
9806
9807extern __inline __m256i
9808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9809_mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
9810 const int imm)
9811{
9812 return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
9813 (__v4di) __B,
9814 (__v4di) __C, imm,
9815 (__mmask8) -1);
9816}
9817
9818extern __inline __m256i
9819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9820_mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
9821 __m256i __B, __m256i __C,
9822 const int imm)
9823{
9824 return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
9825 (__v4di) __B,
9826 (__v4di) __C, imm,
9827 (__mmask8) __U);
9828}
9829
9830extern __inline __m256i
9831__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9832_mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
9833 __m256i __B, __m256i __C,
9834 const int imm)
9835{
9836 return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
9837 (__v4di) __B,
9838 (__v4di) __C,
9839 imm,
9840 (__mmask8) __U);
9841}
9842
9843extern __inline __m256i
9844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9845_mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
9846 const int imm)
9847{
9848 return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
9849 (__v8si) __B,
9850 (__v8si) __C, imm,
9851 (__mmask8) -1);
9852}
9853
9854extern __inline __m256i
9855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9856_mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
9857 __m256i __B, __m256i __C,
9858 const int imm)
9859{
9860 return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
9861 (__v8si) __B,
9862 (__v8si) __C, imm,
9863 (__mmask8) __U);
9864}
9865
9866extern __inline __m256i
9867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9868_mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
9869 __m256i __B, __m256i __C,
9870 const int imm)
9871{
9872 return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
9873 (__v8si) __B,
9874 (__v8si) __C,
9875 imm,
9876 (__mmask8) __U);
9877}
9878
9879extern __inline __m128i
9880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9881_mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
9882 const int imm)
9883{
9884 return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
9885 (__v2di) __B,
9886 (__v2di) __C, imm,
9887 (__mmask8) -1);
9888}
9889
9890extern __inline __m128i
9891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9892_mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
9893 __m128i __B, __m128i __C, const int imm)
9894{
9895 return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
9896 (__v2di) __B,
9897 (__v2di) __C, imm,
9898 (__mmask8) __U);
9899}
9900
9901extern __inline __m128i
9902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9903_mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
9904 __m128i __B, __m128i __C, const int imm)
9905{
9906 return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
9907 (__v2di) __B,
9908 (__v2di) __C,
9909 imm,
9910 (__mmask8) __U);
9911}
9912
9913extern __inline __m128i
9914__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9915_mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
9916 const int imm)
9917{
9918 return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
9919 (__v4si) __B,
9920 (__v4si) __C, imm,
9921 (__mmask8) -1);
9922}
9923
9924extern __inline __m128i
9925__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9926_mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
9927 __m128i __B, __m128i __C, const int imm)
9928{
9929 return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
9930 (__v4si) __B,
9931 (__v4si) __C, imm,
9932 (__mmask8) __U);
9933}
9934
9935extern __inline __m128i
9936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9937_mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
9938 __m128i __B, __m128i __C, const int imm)
9939{
9940 return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
9941 (__v4si) __B,
9942 (__v4si) __C,
9943 imm,
9944 (__mmask8) __U);
9945}
9946
9947extern __inline __m256
9948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9949_mm256_roundscale_ps (__m256 __A, const int __imm)
9950{
9951 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
9952 __imm,
9953 (__v8sf)
9954 _mm256_setzero_ps (),
9955 (__mmask8) -1);
9956}
9957
9958extern __inline __m256
9959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9960_mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A,
9961 const int __imm)
9962{
9963 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
9964 __imm,
9965 (__v8sf) __W,
9966 (__mmask8) __U);
9967}
9968
9969extern __inline __m256
9970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9971_mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm)
9972{
9973 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
9974 __imm,
9975 (__v8sf)
9976 _mm256_setzero_ps (),
9977 (__mmask8) __U);
9978}
9979
9980extern __inline __m256d
9981__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9982_mm256_roundscale_pd (__m256d __A, const int __imm)
9983{
9984 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
9985 __imm,
9986 (__v4df)
9987 _mm256_setzero_pd (),
9988 (__mmask8) -1);
9989}
9990
9991extern __inline __m256d
9992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9993_mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A,
9994 const int __imm)
9995{
9996 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
9997 __imm,
9998 (__v4df) __W,
9999 (__mmask8) __U);
10000}
10001
10002extern __inline __m256d
10003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10004_mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm)
10005{
10006 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10007 __imm,
10008 (__v4df)
10009 _mm256_setzero_pd (),
10010 (__mmask8) __U);
10011}
10012
10013extern __inline __m128
10014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10015_mm_roundscale_ps (__m128 __A, const int __imm)
10016{
10017 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10018 __imm,
10019 (__v4sf)
10020 _mm_setzero_ps (),
10021 (__mmask8) -1);
10022}
10023
10024extern __inline __m128
10025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10026_mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A,
10027 const int __imm)
10028{
10029 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10030 __imm,
10031 (__v4sf) __W,
10032 (__mmask8) __U);
10033}
10034
10035extern __inline __m128
10036__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10037_mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm)
10038{
10039 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10040 __imm,
10041 (__v4sf)
10042 _mm_setzero_ps (),
10043 (__mmask8) __U);
10044}
10045
10046extern __inline __m128d
10047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10048_mm_roundscale_pd (__m128d __A, const int __imm)
10049{
10050 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10051 __imm,
10052 (__v2df)
10053 _mm_setzero_pd (),
10054 (__mmask8) -1);
10055}
10056
10057extern __inline __m128d
10058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10059_mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A,
10060 const int __imm)
10061{
10062 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10063 __imm,
10064 (__v2df) __W,
10065 (__mmask8) __U);
10066}
10067
10068extern __inline __m128d
10069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10070_mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm)
10071{
10072 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10073 __imm,
10074 (__v2df)
10075 _mm_setzero_pd (),
10076 (__mmask8) __U);
10077}
10078
10079extern __inline __m256
10080__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10081_mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
10082 _MM_MANTISSA_SIGN_ENUM __C)
10083{
10084 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10085 (__C << 2) | __B,
10086 (__v8sf)
10087 _mm256_setzero_ps (),
10088 (__mmask8) -1);
10089}
10090
10091extern __inline __m256
10092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10093_mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A,
10094 _MM_MANTISSA_NORM_ENUM __B,
10095 _MM_MANTISSA_SIGN_ENUM __C)
10096{
10097 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10098 (__C << 2) | __B,
10099 (__v8sf) __W,
10100 (__mmask8) __U);
10101}
10102
10103extern __inline __m256
10104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10105_mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A,
10106 _MM_MANTISSA_NORM_ENUM __B,
10107 _MM_MANTISSA_SIGN_ENUM __C)
10108{
10109 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10110 (__C << 2) | __B,
10111 (__v8sf)
10112 _mm256_setzero_ps (),
10113 (__mmask8) __U);
10114}
10115
10116extern __inline __m128
10117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10118_mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B,
10119 _MM_MANTISSA_SIGN_ENUM __C)
10120{
10121 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10122 (__C << 2) | __B,
10123 (__v4sf)
10124 _mm_setzero_ps (),
10125 (__mmask8) -1);
10126}
10127
10128extern __inline __m128
10129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10130_mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A,
10131 _MM_MANTISSA_NORM_ENUM __B,
10132 _MM_MANTISSA_SIGN_ENUM __C)
10133{
10134 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10135 (__C << 2) | __B,
10136 (__v4sf) __W,
10137 (__mmask8) __U);
10138}
10139
10140extern __inline __m128
10141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10142_mm_maskz_getmant_ps (__mmask8 __U, __m128 __A,
10143 _MM_MANTISSA_NORM_ENUM __B,
10144 _MM_MANTISSA_SIGN_ENUM __C)
10145{
10146 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10147 (__C << 2) | __B,
10148 (__v4sf)
10149 _mm_setzero_ps (),
10150 (__mmask8) __U);
10151}
10152
10153extern __inline __m256d
10154__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10155_mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
10156 _MM_MANTISSA_SIGN_ENUM __C)
10157{
10158 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10159 (__C << 2) | __B,
10160 (__v4df)
10161 _mm256_setzero_pd (),
10162 (__mmask8) -1);
10163}
10164
10165extern __inline __m256d
10166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10167_mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A,
10168 _MM_MANTISSA_NORM_ENUM __B,
10169 _MM_MANTISSA_SIGN_ENUM __C)
10170{
10171 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10172 (__C << 2) | __B,
10173 (__v4df) __W,
10174 (__mmask8) __U);
10175}
10176
10177extern __inline __m256d
10178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10179_mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A,
10180 _MM_MANTISSA_NORM_ENUM __B,
10181 _MM_MANTISSA_SIGN_ENUM __C)
10182{
10183 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10184 (__C << 2) | __B,
10185 (__v4df)
10186 _mm256_setzero_pd (),
10187 (__mmask8) __U);
10188}
10189
10190extern __inline __m128d
10191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10192_mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B,
10193 _MM_MANTISSA_SIGN_ENUM __C)
10194{
10195 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10196 (__C << 2) | __B,
10197 (__v2df)
10198 _mm_setzero_pd (),
10199 (__mmask8) -1);
10200}
10201
10202extern __inline __m128d
10203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10204_mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A,
10205 _MM_MANTISSA_NORM_ENUM __B,
10206 _MM_MANTISSA_SIGN_ENUM __C)
10207{
10208 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10209 (__C << 2) | __B,
10210 (__v2df) __W,
10211 (__mmask8) __U);
10212}
10213
10214extern __inline __m128d
10215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10216_mm_maskz_getmant_pd (__mmask8 __U, __m128d __A,
10217 _MM_MANTISSA_NORM_ENUM __B,
10218 _MM_MANTISSA_SIGN_ENUM __C)
10219{
10220 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10221 (__C << 2) | __B,
10222 (__v2df)
10223 _mm_setzero_pd (),
10224 (__mmask8) __U);
10225}
10226
10227extern __inline __m256
10228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10229_mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask,
10230 __m256i __index, float const *__addr,
10231 int __scale)
10232{
10233 return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old,
10234 __addr,
10235 (__v8si) __index,
10236 __mask, __scale);
10237}
10238
10239extern __inline __m128
10240__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10241_mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask,
10242 __m128i __index, float const *__addr,
10243 int __scale)
10244{
10245 return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old,
10246 __addr,
10247 (__v4si) __index,
10248 __mask, __scale);
10249}
10250
10251extern __inline __m256d
10252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10253_mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask,
10254 __m128i __index, double const *__addr,
10255 int __scale)
10256{
10257 return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old,
10258 __addr,
10259 (__v4si) __index,
10260 __mask, __scale);
10261}
10262
10263extern __inline __m128d
10264__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10265_mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask,
10266 __m128i __index, double const *__addr,
10267 int __scale)
10268{
10269 return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old,
10270 __addr,
10271 (__v4si) __index,
10272 __mask, __scale);
10273}
10274
10275extern __inline __m128
10276__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10277_mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10278 __m256i __index, float const *__addr,
10279 int __scale)
10280{
10281 return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old,
10282 __addr,
10283 (__v4di) __index,
10284 __mask, __scale);
10285}
10286
10287extern __inline __m128
10288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10289_mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10290 __m128i __index, float const *__addr,
10291 int __scale)
10292{
10293 return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old,
10294 __addr,
10295 (__v2di) __index,
10296 __mask, __scale);
10297}
10298
10299extern __inline __m256d
10300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10301_mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask,
10302 __m256i __index, double const *__addr,
10303 int __scale)
10304{
10305 return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old,
10306 __addr,
10307 (__v4di) __index,
10308 __mask, __scale);
10309}
10310
10311extern __inline __m128d
10312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10313_mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask,
10314 __m128i __index, double const *__addr,
10315 int __scale)
10316{
10317 return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old,
10318 __addr,
10319 (__v2di) __index,
10320 __mask, __scale);
10321}
10322
10323extern __inline __m256i
10324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10325_mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask,
10326 __m256i __index, int const *__addr,
10327 int __scale)
10328{
10329 return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old,
10330 __addr,
10331 (__v8si) __index,
10332 __mask, __scale);
10333}
10334
10335extern __inline __m128i
10336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10337_mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10338 __m128i __index, int const *__addr,
10339 int __scale)
10340{
10341 return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old,
10342 __addr,
10343 (__v4si) __index,
10344 __mask, __scale);
10345}
10346
10347extern __inline __m256i
10348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10349_mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask,
10350 __m128i __index, long long const *__addr,
10351 int __scale)
10352{
10353 return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old,
10354 __addr,
10355 (__v4si) __index,
10356 __mask, __scale);
10357}
10358
10359extern __inline __m128i
10360__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10361_mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask,
10362 __m128i __index, long long const *__addr,
10363 int __scale)
10364{
10365 return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old,
10366 __addr,
10367 (__v4si) __index,
10368 __mask, __scale);
10369}
10370
10371extern __inline __m128i
10372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10373_mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10374 __m256i __index, int const *__addr,
10375 int __scale)
10376{
10377 return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old,
10378 __addr,
10379 (__v4di) __index,
10380 __mask, __scale);
10381}
10382
10383extern __inline __m128i
10384__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10385_mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10386 __m128i __index, int const *__addr,
10387 int __scale)
10388{
10389 return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old,
10390 __addr,
10391 (__v2di) __index,
10392 __mask, __scale);
10393}
10394
10395extern __inline __m256i
10396__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10397_mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask,
10398 __m256i __index, long long const *__addr,
10399 int __scale)
10400{
10401 return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old,
10402 __addr,
10403 (__v4di) __index,
10404 __mask, __scale);
10405}
10406
10407extern __inline __m128i
10408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10409_mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask,
10410 __m128i __index, long long const *__addr,
10411 int __scale)
10412{
10413 return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old,
10414 __addr,
10415 (__v2di) __index,
10416 __mask, __scale);
10417}
10418
10419extern __inline void
10420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10421_mm256_i32scatter_ps (float *__addr, __m256i __index,
10422 __m256 __v1, const int __scale)
10423{
10424 __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,
10425 (__v8si) __index, (__v8sf) __v1,
10426 __scale);
10427}
10428
10429extern __inline void
10430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10431_mm256_mask_i32scatter_ps (float *__addr, __mmask8 __mask,
10432 __m256i __index, __m256 __v1,
10433 const int __scale)
10434{
10435 __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,
10436 (__v8sf) __v1, __scale);
10437}
10438
10439extern __inline void
10440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10441_mm_i32scatter_ps (float *__addr, __m128i __index, __m128 __v1,
10442 const int __scale)
10443{
10444 __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,
10445 (__v4si) __index, (__v4sf) __v1,
10446 __scale);
10447}
10448
10449extern __inline void
10450__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10451_mm_mask_i32scatter_ps (float *__addr, __mmask8 __mask,
10452 __m128i __index, __m128 __v1,
10453 const int __scale)
10454{
10455 __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index,
10456 (__v4sf) __v1, __scale);
10457}
10458
10459extern __inline void
10460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10461_mm256_i32scatter_pd (double *__addr, __m128i __index,
10462 __m256d __v1, const int __scale)
10463{
10464 __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,
10465 (__v4si) __index, (__v4df) __v1,
10466 __scale);
10467}
10468
10469extern __inline void
10470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10471_mm256_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
10472 __m128i __index, __m256d __v1,
10473 const int __scale)
10474{
10475 __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index,
10476 (__v4df) __v1, __scale);
10477}
10478
10479extern __inline void
10480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10481_mm_i32scatter_pd (double *__addr, __m128i __index,
10482 __m128d __v1, const int __scale)
10483{
10484 __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF,
10485 (__v4si) __index, (__v2df) __v1,
10486 __scale);
10487}
10488
10489extern __inline void
10490__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10491_mm_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
10492 __m128i __index, __m128d __v1,
10493 const int __scale)
10494{
10495 __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,
10496 (__v2df) __v1, __scale);
10497}
10498
10499extern __inline void
10500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10501_mm256_i64scatter_ps (float *__addr, __m256i __index,
10502 __m128 __v1, const int __scale)
10503{
10504 __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF,
10505 (__v4di) __index, (__v4sf) __v1,
10506 __scale);
10507}
10508
10509extern __inline void
10510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10511_mm256_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
10512 __m256i __index, __m128 __v1,
10513 const int __scale)
10514{
10515 __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index,
10516 (__v4sf) __v1, __scale);
10517}
10518
10519extern __inline void
10520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10521_mm_i64scatter_ps (float *__addr, __m128i __index, __m128 __v1,
10522 const int __scale)
10523{
10524 __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,
10525 (__v2di) __index, (__v4sf) __v1,
10526 __scale);
10527}
10528
10529extern __inline void
10530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10531_mm_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
10532 __m128i __index, __m128 __v1,
10533 const int __scale)
10534{
10535 __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,
10536 (__v4sf) __v1, __scale);
10537}
10538
10539extern __inline void
10540__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10541_mm256_i64scatter_pd (double *__addr, __m256i __index,
10542 __m256d __v1, const int __scale)
10543{
10544 __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,
10545 (__v4di) __index, (__v4df) __v1,
10546 __scale);
10547}
10548
10549extern __inline void
10550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10551_mm256_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
10552 __m256i __index, __m256d __v1,
10553 const int __scale)
10554{
10555 __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,
10556 (__v4df) __v1, __scale);
10557}
10558
10559extern __inline void
10560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10561_mm_i64scatter_pd (double *__addr, __m128i __index,
10562 __m128d __v1, const int __scale)
10563{
10564 __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF,
10565 (__v2di) __index, (__v2df) __v1,
10566 __scale);
10567}
10568
10569extern __inline void
10570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10571_mm_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
10572 __m128i __index, __m128d __v1,
10573 const int __scale)
10574{
10575 __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index,
10576 (__v2df) __v1, __scale);
10577}
10578
10579extern __inline void
10580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10581_mm256_i32scatter_epi32 (int *__addr, __m256i __index,
10582 __m256i __v1, const int __scale)
10583{
10584 __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,
10585 (__v8si) __index, (__v8si) __v1,
10586 __scale);
10587}
10588
10589extern __inline void
10590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10591_mm256_mask_i32scatter_epi32 (int *__addr, __mmask8 __mask,
10592 __m256i __index, __m256i __v1,
10593 const int __scale)
10594{
10595 __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,
10596 (__v8si) __v1, __scale);
10597}
10598
10599extern __inline void
10600__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10601_mm_i32scatter_epi32 (int *__addr, __m128i __index,
10602 __m128i __v1, const int __scale)
10603{
10604 __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,
10605 (__v4si) __index, (__v4si) __v1,
10606 __scale);
10607}
10608
10609extern __inline void
10610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10611_mm_mask_i32scatter_epi32 (int *__addr, __mmask8 __mask,
10612 __m128i __index, __m128i __v1,
10613 const int __scale)
10614{
10615 __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,
10616 (__v4si) __v1, __scale);
10617}
10618
10619extern __inline void
10620__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10621_mm256_i32scatter_epi64 (long long *__addr, __m128i __index,
10622 __m256i __v1, const int __scale)
10623{
10624 __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,
10625 (__v4si) __index, (__v4di) __v1,
10626 __scale);
10627}
10628
10629extern __inline void
10630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10631_mm256_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
10632 __m128i __index, __m256i __v1,
10633 const int __scale)
10634{
10635 __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,
10636 (__v4di) __v1, __scale);
10637}
10638
10639extern __inline void
10640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10641_mm_i32scatter_epi64 (long long *__addr, __m128i __index,
10642 __m128i __v1, const int __scale)
10643{
10644 __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,
10645 (__v4si) __index, (__v2di) __v1,
10646 __scale);
10647}
10648
10649extern __inline void
10650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10651_mm_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
10652 __m128i __index, __m128i __v1,
10653 const int __scale)
10654{
10655 __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index,
10656 (__v2di) __v1, __scale);
10657}
10658
10659extern __inline void
10660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10661_mm256_i64scatter_epi32 (int *__addr, __m256i __index,
10662 __m128i __v1, const int __scale)
10663{
10664 __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF,
10665 (__v4di) __index, (__v4si) __v1,
10666 __scale);
10667}
10668
10669extern __inline void
10670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10671_mm256_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
10672 __m256i __index, __m128i __v1,
10673 const int __scale)
10674{
10675 __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index,
10676 (__v4si) __v1, __scale);
10677}
10678
10679extern __inline void
10680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10681_mm_i64scatter_epi32 (int *__addr, __m128i __index,
10682 __m128i __v1, const int __scale)
10683{
10684 __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,
10685 (__v2di) __index, (__v4si) __v1,
10686 __scale);
10687}
10688
10689extern __inline void
10690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10691_mm_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
10692 __m128i __index, __m128i __v1,
10693 const int __scale)
10694{
10695 __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,
10696 (__v4si) __v1, __scale);
10697}
10698
10699extern __inline void
10700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10701_mm256_i64scatter_epi64 (long long *__addr, __m256i __index,
10702 __m256i __v1, const int __scale)
10703{
10704 __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF,
10705 (__v4di) __index, (__v4di) __v1,
10706 __scale);
10707}
10708
10709extern __inline void
10710__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10711_mm256_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
10712 __m256i __index, __m256i __v1,
10713 const int __scale)
10714{
10715 __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,
10716 (__v4di) __v1, __scale);
10717}
10718
10719extern __inline void
10720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10721_mm_i64scatter_epi64 (long long *__addr, __m128i __index,
10722 __m128i __v1, const int __scale)
10723{
10724 __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF,
10725 (__v2di) __index, (__v2di) __v1,
10726 __scale);
10727}
10728
10729extern __inline void
10730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10731_mm_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
10732 __m128i __index, __m128i __v1,
10733 const int __scale)
10734{
10735 __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,
10736 (__v2di) __v1, __scale);
10737}
10738
10739extern __inline __m256i
10740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10741_mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10742 _MM_PERM_ENUM __mask)
10743{
10744 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
10745 (__v8si) __W,
10746 (__mmask8) __U);
10747}
10748
10749extern __inline __m256i
10750__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10751_mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A,
10752 _MM_PERM_ENUM __mask)
10753{
10754 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
10755 (__v8si)
10756 _mm256_setzero_si256 (),
10757 (__mmask8) __U);
10758}
10759
10760extern __inline __m128i
10761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10762_mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10763 _MM_PERM_ENUM __mask)
10764{
10765 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
10766 (__v4si) __W,
10767 (__mmask8) __U);
10768}
10769
10770extern __inline __m128i
10771__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10772_mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A,
10773 _MM_PERM_ENUM __mask)
10774{
10775 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
10776 (__v4si)
10777 _mm_setzero_si128 (),
10778 (__mmask8) __U);
10779}
10780
10781extern __inline __m256i
10782__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10783_mm256_rol_epi32 (__m256i __A, const int __B)
10784{
10785 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
10786 (__v8si)
10787 _mm256_setzero_si256 (),
10788 (__mmask8) -1);
10789}
10790
10791extern __inline __m256i
10792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10793_mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10794 const int __B)
10795{
10796 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
10797 (__v8si) __W,
10798 (__mmask8) __U);
10799}
10800
10801extern __inline __m256i
10802__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10803_mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B)
10804{
10805 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
10806 (__v8si)
10807 _mm256_setzero_si256 (),
10808 (__mmask8) __U);
10809}
10810
10811extern __inline __m128i
10812__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10813_mm_rol_epi32 (__m128i __A, const int __B)
10814{
10815 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
10816 (__v4si)
10817 _mm_setzero_si128 (),
10818 (__mmask8) -1);
10819}
10820
10821extern __inline __m128i
10822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10823_mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10824 const int __B)
10825{
10826 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
10827 (__v4si) __W,
10828 (__mmask8) __U);
10829}
10830
10831extern __inline __m128i
10832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10833_mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B)
10834{
10835 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
10836 (__v4si)
10837 _mm_setzero_si128 (),
10838 (__mmask8) __U);
10839}
10840
10841extern __inline __m256i
10842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10843_mm256_ror_epi32 (__m256i __A, const int __B)
10844{
10845 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
10846 (__v8si)
10847 _mm256_setzero_si256 (),
10848 (__mmask8) -1);
10849}
10850
10851extern __inline __m256i
10852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10853_mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10854 const int __B)
10855{
10856 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
10857 (__v8si) __W,
10858 (__mmask8) __U);
10859}
10860
10861extern __inline __m256i
10862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10863_mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B)
10864{
10865 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
10866 (__v8si)
10867 _mm256_setzero_si256 (),
10868 (__mmask8) __U);
10869}
10870
10871extern __inline __m128i
10872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10873_mm_ror_epi32 (__m128i __A, const int __B)
10874{
10875 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
10876 (__v4si)
10877 _mm_setzero_si128 (),
10878 (__mmask8) -1);
10879}
10880
10881extern __inline __m128i
10882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10883_mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10884 const int __B)
10885{
10886 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
10887 (__v4si) __W,
10888 (__mmask8) __U);
10889}
10890
10891extern __inline __m128i
10892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10893_mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B)
10894{
10895 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
10896 (__v4si)
10897 _mm_setzero_si128 (),
10898 (__mmask8) __U);
10899}
10900
10901extern __inline __m256i
10902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10903_mm256_rol_epi64 (__m256i __A, const int __B)
10904{
10905 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
10906 (__v4di)
10907 _mm256_setzero_si256 (),
10908 (__mmask8) -1);
10909}
10910
10911extern __inline __m256i
10912__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10913_mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
10914 const int __B)
10915{
10916 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
10917 (__v4di) __W,
10918 (__mmask8) __U);
10919}
10920
10921extern __inline __m256i
10922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10923_mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B)
10924{
10925 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
10926 (__v4di)
10927 _mm256_setzero_si256 (),
10928 (__mmask8) __U);
10929}
10930
10931extern __inline __m128i
10932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10933_mm_rol_epi64 (__m128i __A, const int __B)
10934{
10935 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
10936 (__v2di)
10937 _mm_setzero_di (),
10938 (__mmask8) -1);
10939}
10940
10941extern __inline __m128i
10942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10943_mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
10944 const int __B)
10945{
10946 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
10947 (__v2di) __W,
10948 (__mmask8) __U);
10949}
10950
10951extern __inline __m128i
10952__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10953_mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
10954{
10955 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
10956 (__v2di)
10957 _mm_setzero_di (),
10958 (__mmask8) __U);
10959}
10960
10961extern __inline __m256i
10962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10963_mm256_ror_epi64 (__m256i __A, const int __B)
10964{
10965 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
10966 (__v4di)
10967 _mm256_setzero_si256 (),
10968 (__mmask8) -1);
10969}
10970
10971extern __inline __m256i
10972__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10973_mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
10974 const int __B)
10975{
10976 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
10977 (__v4di) __W,
10978 (__mmask8) __U);
10979}
10980
10981extern __inline __m256i
10982__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10983_mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B)
10984{
10985 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
10986 (__v4di)
10987 _mm256_setzero_si256 (),
10988 (__mmask8) __U);
10989}
10990
10991extern __inline __m128i
10992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10993_mm_ror_epi64 (__m128i __A, const int __B)
10994{
10995 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
10996 (__v2di)
10997 _mm_setzero_di (),
10998 (__mmask8) -1);
10999}
11000
11001extern __inline __m128i
11002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11003_mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11004 const int __B)
11005{
11006 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11007 (__v2di) __W,
11008 (__mmask8) __U);
11009}
11010
11011extern __inline __m128i
11012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11013_mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
11014{
11015 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11016 (__v2di)
11017 _mm_setzero_di (),
11018 (__mmask8) __U);
11019}
11020
11021extern __inline __m128i
11022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11023_mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm)
11024{
11025 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11026 (__v4si) __B, __imm,
11027 (__v4si)
11028 _mm_setzero_si128 (),
11029 (__mmask8) -1);
11030}
11031
11032extern __inline __m128i
11033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11034_mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11035 __m128i __B, const int __imm)
11036{
11037 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11038 (__v4si) __B, __imm,
11039 (__v4si) __W,
11040 (__mmask8) __U);
11041}
11042
11043extern __inline __m128i
11044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11045_mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B,
11046 const int __imm)
11047{
11048 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11049 (__v4si) __B, __imm,
11050 (__v4si)
11051 _mm_setzero_si128 (),
11052 (__mmask8) __U);
11053}
11054
11055extern __inline __m128i
11056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11057_mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
11058{
11059 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11060 (__v2di) __B, __imm,
11061 (__v2di)
11062 _mm_setzero_di (),
11063 (__mmask8) -1);
11064}
11065
11066extern __inline __m128i
11067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11068_mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11069 __m128i __B, const int __imm)
11070{
11071 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11072 (__v2di) __B, __imm,
11073 (__v2di) __W,
11074 (__mmask8) __U);
11075}
11076
11077extern __inline __m128i
11078__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11079_mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
11080 const int __imm)
11081{
11082 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11083 (__v2di) __B, __imm,
11084 (__v2di)
11085 _mm_setzero_di (),
11086 (__mmask8) __U);
11087}
11088
11089extern __inline __m256i
11090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11091_mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm)
11092{
11093 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11094 (__v8si) __B, __imm,
11095 (__v8si)
11096 _mm256_setzero_si256 (),
11097 (__mmask8) -1);
11098}
11099
11100extern __inline __m256i
11101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11102_mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11103 __m256i __B, const int __imm)
11104{
11105 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11106 (__v8si) __B, __imm,
11107 (__v8si) __W,
11108 (__mmask8) __U);
11109}
11110
11111extern __inline __m256i
11112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11113_mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B,
11114 const int __imm)
11115{
11116 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11117 (__v8si) __B, __imm,
11118 (__v8si)
11119 _mm256_setzero_si256 (),
11120 (__mmask8) __U);
11121}
11122
11123extern __inline __m256i
11124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11125_mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm)
11126{
11127 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11128 (__v4di) __B, __imm,
11129 (__v4di)
11130 _mm256_setzero_si256 (),
11131 (__mmask8) -1);
11132}
11133
11134extern __inline __m256i
11135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11136_mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11137 __m256i __B, const int __imm)
11138{
11139 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11140 (__v4di) __B, __imm,
11141 (__v4di) __W,
11142 (__mmask8) __U);
11143}
11144
11145extern __inline __m256i
11146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11147_mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B,
11148 const int __imm)
11149{
11150 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11151 (__v4di) __B, __imm,
11152 (__v4di)
11153 _mm256_setzero_si256 (),
11154 (__mmask8) __U);
11155}
11156
11157extern __inline __m128i
11158__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11159_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A,
11160 const int __I)
11161{
11162 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11163 (__v8hi) __W,
11164 (__mmask8) __U);
11165}
11166
11167extern __inline __m128i
11168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11169_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
11170{
11171 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11172 (__v8hi)
11173 _mm_setzero_hi (),
11174 (__mmask8) __U);
11175}
11176
11177extern __inline __m128i
11178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11179_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A,
11180 const int __I)
11181{
11182 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11183 (__v8hi) __W,
11184 (__mmask8) __U);
11185}
11186
11187extern __inline __m128i
11188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11189_mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
11190{
11191 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11192 (__v8hi)
11193 _mm_setzero_hi (),
11194 (__mmask8) __U);
11195}
11196
11197extern __inline __m256i
11198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11199_mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11200 const int __imm)
11201{
11202 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11203 (__v8si) __W,
11204 (__mmask8) __U);
11205}
11206
11207extern __inline __m256i
11208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11209_mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm)
11210{
11211 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11212 (__v8si)
11213 _mm256_setzero_si256 (),
11214 (__mmask8) __U);
11215}
11216
11217extern __inline __m128i
11218__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11219_mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11220 const int __imm)
11221{
11222 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11223 (__v4si) __W,
11224 (__mmask8) __U);
11225}
11226
11227extern __inline __m128i
11228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11229_mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm)
11230{
11231 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11232 (__v4si)
11233 _mm_setzero_si128 (),
11234 (__mmask8) __U);
11235}
11236
11237extern __inline __m256i
11238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11239_mm256_srai_epi64 (__m256i __A, const int __imm)
11240{
11241 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11242 (__v4di)
11243 _mm256_setzero_si256 (),
11244 (__mmask8) -1);
11245}
11246
11247extern __inline __m256i
11248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11249_mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11250 const int __imm)
11251{
11252 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11253 (__v4di) __W,
11254 (__mmask8) __U);
11255}
11256
11257extern __inline __m256i
11258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11259_mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm)
11260{
11261 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11262 (__v4di)
11263 _mm256_setzero_si256 (),
11264 (__mmask8) __U);
11265}
11266
11267extern __inline __m128i
11268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11269_mm_srai_epi64 (__m128i __A, const int __imm)
11270{
11271 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11272 (__v2di)
11273 _mm_setzero_di (),
11274 (__mmask8) -1);
11275}
11276
11277extern __inline __m128i
11278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11279_mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11280 const int __imm)
11281{
11282 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11283 (__v2di) __W,
11284 (__mmask8) __U);
11285}
11286
11287extern __inline __m128i
11288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11289_mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm)
11290{
11291 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11292 (__v2di)
11293 _mm_setzero_si128 (),
11294 (__mmask8) __U);
11295}
11296
11297extern __inline __m128i
11298__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11299_mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
11300{
11301 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11302 (__v4si) __W,
11303 (__mmask8) __U);
11304}
11305
11306extern __inline __m128i
11307__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11308_mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B)
11309{
11310 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11311 (__v4si)
11312 _mm_setzero_si128 (),
11313 (__mmask8) __U);
11314}
11315
11316extern __inline __m128i
11317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11318_mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
11319{
11320 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11321 (__v2di) __W,
11322 (__mmask8) __U);
11323}
11324
11325extern __inline __m128i
11326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11327_mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
11328{
11329 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11330 (__v2di)
11331 _mm_setzero_di (),
11332 (__mmask8) __U);
11333}
11334
11335extern __inline __m256i
11336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11337_mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11338 int __B)
11339{
11340 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
11341 (__v8si) __W,
11342 (__mmask8) __U);
11343}
11344
11345extern __inline __m256i
11346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11347_mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B)
11348{
11349 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
11350 (__v8si)
11351 _mm256_setzero_si256 (),
11352 (__mmask8) __U);
11353}
11354
11355extern __inline __m256i
11356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11357_mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11358 int __B)
11359{
11360 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
11361 (__v4di) __W,
11362 (__mmask8) __U);
11363}
11364
11365extern __inline __m256i
11366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11367_mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B)
11368{
11369 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
11370 (__v4di)
11371 _mm256_setzero_si256 (),
11372 (__mmask8) __U);
11373}
11374
11375extern __inline __m256d
11376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11377_mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X,
11378 const int __imm)
11379{
11380 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
11381 (__v4df) __W,
11382 (__mmask8) __U);
11383}
11384
11385extern __inline __m256d
11386__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11387_mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm)
11388{
11389 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
11390 (__v4df)
11391 _mm256_setzero_pd (),
11392 (__mmask8) __U);
11393}
11394
11395extern __inline __m256d
11396__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11397_mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X,
11398 const int __C)
11399{
11400 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
11401 (__v4df) __W,
11402 (__mmask8) __U);
11403}
11404
11405extern __inline __m256d
11406__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11407_mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C)
11408{
11409 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
11410 (__v4df)
11411 _mm256_setzero_pd (),
11412 (__mmask8) __U);
11413}
11414
11415extern __inline __m128d
11416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11417_mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X,
11418 const int __C)
11419{
11420 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
11421 (__v2df) __W,
11422 (__mmask8) __U);
11423}
11424
11425extern __inline __m128d
11426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11427_mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C)
11428{
11429 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
11430 (__v2df)
11431 _mm_setzero_pd (),
11432 (__mmask8) __U);
11433}
11434
11435extern __inline __m256
11436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11437_mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X,
11438 const int __C)
11439{
11440 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
11441 (__v8sf) __W,
11442 (__mmask8) __U);
11443}
11444
11445extern __inline __m256
11446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11447_mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C)
11448{
11449 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
11450 (__v8sf)
11451 _mm256_setzero_ps (),
11452 (__mmask8) __U);
11453}
11454
11455extern __inline __m128
11456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11457_mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X,
11458 const int __C)
11459{
11460 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
11461 (__v4sf) __W,
11462 (__mmask8) __U);
11463}
11464
11465extern __inline __m128
11466__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11467_mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
11468{
11469 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
11470 (__v4sf)
11471 _mm_setzero_ps (),
11472 (__mmask8) __U);
11473}
11474
11475extern __inline __m256d
11476__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11477_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
11478{
11479 return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
11480 (__v4df) __W,
11481 (__mmask8) __U);
11482}
11483
11484extern __inline __m256
11485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11486_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
11487{
11488 return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
11489 (__v8sf) __W,
11490 (__mmask8) __U);
11491}
11492
11493extern __inline __m256i
11494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11495_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
11496{
11497 return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
11498 (__v4di) __W,
11499 (__mmask8) __U);
11500}
11501
11502extern __inline __m256i
11503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11504_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
11505{
11506 return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
11507 (__v8si) __W,
11508 (__mmask8) __U);
11509}
11510
11511extern __inline __m128d
11512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11513_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
11514{
11515 return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
11516 (__v2df) __W,
11517 (__mmask8) __U);
11518}
11519
11520extern __inline __m128
11521__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11522_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
11523{
11524 return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
11525 (__v4sf) __W,
11526 (__mmask8) __U);
11527}
11528
11529extern __inline __m128i
11530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11531_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
11532{
11533 return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
11534 (__v2di) __W,
11535 (__mmask8) __U);
11536}
11537
11538extern __inline __m128i
11539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11540_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
11541{
11542 return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
11543 (__v4si) __W,
11544 (__mmask8) __U);
11545}
11546
11547extern __inline __mmask8
11548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11549_mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
11550{
11551 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
11552 (__v4di) __Y, __P,
11553 (__mmask8) -1);
11554}
11555
11556extern __inline __mmask8
11557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11558_mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P)
11559{
11560 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11561 (__v8si) __Y, __P,
11562 (__mmask8) -1);
11563}
11564
11565extern __inline __mmask8
11566__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11567_mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P)
11568{
11569 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11570 (__v4di) __Y, __P,
11571 (__mmask8) -1);
11572}
11573
11574extern __inline __mmask8
11575__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11576_mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P)
11577{
11578 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11579 (__v8si) __Y, __P,
11580 (__mmask8) -1);
11581}
11582
11583extern __inline __mmask8
11584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11585_mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P)
11586{
11587 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
11588 (__v4df) __Y, __P,
11589 (__mmask8) -1);
11590}
11591
11592extern __inline __mmask8
11593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11594_mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P)
11595{
11596 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
11597 (__v8sf) __Y, __P,
11598 (__mmask8) -1);
11599}
11600
11601extern __inline __mmask8
11602__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11603_mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11604 const int __P)
11605{
11606 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
11607 (__v4di) __Y, __P,
11608 (__mmask8) __U);
11609}
11610
11611extern __inline __mmask8
11612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11613_mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11614 const int __P)
11615{
11616 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11617 (__v8si) __Y, __P,
11618 (__mmask8) __U);
11619}
11620
11621extern __inline __mmask8
11622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11623_mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11624 const int __P)
11625{
11626 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11627 (__v4di) __Y, __P,
11628 (__mmask8) __U);
11629}
11630
11631extern __inline __mmask8
11632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11633_mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11634 const int __P)
11635{
11636 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11637 (__v8si) __Y, __P,
11638 (__mmask8) __U);
11639}
11640
11641extern __inline __mmask8
11642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11643_mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y,
11644 const int __P)
11645{
11646 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
11647 (__v4df) __Y, __P,
11648 (__mmask8) __U);
11649}
11650
11651extern __inline __mmask8
11652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11653_mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y,
11654 const int __P)
11655{
11656 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
11657 (__v8sf) __Y, __P,
11658 (__mmask8) __U);
11659}
11660
11661extern __inline __mmask8
11662__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11663_mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P)
11664{
11665 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
11666 (__v2di) __Y, __P,
11667 (__mmask8) -1);
11668}
11669
11670extern __inline __mmask8
11671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11672_mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P)
11673{
11674 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
11675 (__v4si) __Y, __P,
11676 (__mmask8) -1);
11677}
11678
11679extern __inline __mmask8
11680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11681_mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P)
11682{
11683 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
11684 (__v2di) __Y, __P,
11685 (__mmask8) -1);
11686}
11687
11688extern __inline __mmask8
11689__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11690_mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P)
11691{
11692 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
11693 (__v4si) __Y, __P,
11694 (__mmask8) -1);
11695}
11696
11697extern __inline __mmask8
11698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11699_mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P)
11700{
11701 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
11702 (__v2df) __Y, __P,
11703 (__mmask8) -1);
11704}
11705
11706extern __inline __mmask8
11707__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11708_mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P)
11709{
11710 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
11711 (__v4sf) __Y, __P,
11712 (__mmask8) -1);
11713}
11714
11715extern __inline __mmask8
11716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11717_mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11718 const int __P)
11719{
11720 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
11721 (__v2di) __Y, __P,
11722 (__mmask8) __U);
11723}
11724
11725extern __inline __mmask8
11726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11727_mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11728 const int __P)
11729{
11730 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
11731 (__v4si) __Y, __P,
11732 (__mmask8) __U);
11733}
11734
11735extern __inline __mmask8
11736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11737_mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11738 const int __P)
11739{
11740 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
11741 (__v2di) __Y, __P,
11742 (__mmask8) __U);
11743}
11744
11745extern __inline __mmask8
11746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11747_mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11748 const int __P)
11749{
11750 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
11751 (__v4si) __Y, __P,
11752 (__mmask8) __U);
11753}
11754
11755extern __inline __mmask8
11756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11757_mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y,
11758 const int __P)
11759{
11760 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
11761 (__v2df) __Y, __P,
11762 (__mmask8) __U);
11763}
11764
11765extern __inline __mmask8
11766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11767_mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y,
11768 const int __P)
11769{
11770 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
11771 (__v4sf) __Y, __P,
11772 (__mmask8) __U);
11773}
11774
11775extern __inline __m256d
11776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11777_mm256_permutex_pd (__m256d __X, const int __M)
11778{
11779 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M,
11780 (__v4df)
11781 _mm256_undefined_pd (),
11782 (__mmask8) -1);
11783}
11784
eee5d6f5
AI
11785extern __inline __mmask8
11786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11787_mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11788{
11789 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11790 (__v8si) __Y, 4,
11791 (__mmask8) __M);
11792}
11793
936c0fe4
AI
11794extern __inline __mmask8
11795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11796_mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
11797{
11798 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11799 (__v8si) __Y, 4,
11800 (__mmask8) - 1);
11801}
11802
eee5d6f5
AI
11803extern __inline __mmask8
11804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11805_mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11806{
11807 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11808 (__v8si) __Y, 1,
11809 (__mmask8) __M);
11810}
11811
936c0fe4
AI
11812extern __inline __mmask8
11813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11814_mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
11815{
11816 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11817 (__v8si) __Y, 1,
11818 (__mmask8) - 1);
11819}
11820
eee5d6f5
AI
11821extern __inline __mmask8
11822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11823_mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11824{
11825 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11826 (__v8si) __Y, 5,
11827 (__mmask8) __M);
11828}
11829
936c0fe4
AI
11830extern __inline __mmask8
11831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11832_mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
11833{
11834 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11835 (__v8si) __Y, 5,
11836 (__mmask8) - 1);
11837}
11838
eee5d6f5
AI
11839extern __inline __mmask8
11840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11841_mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11842{
11843 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11844 (__v8si) __Y, 2,
11845 (__mmask8) __M);
11846}
11847
936c0fe4
AI
11848extern __inline __mmask8
11849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11850_mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
11851{
11852 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11853 (__v8si) __Y, 2,
11854 (__mmask8) - 1);
11855}
11856
eee5d6f5
AI
11857extern __inline __mmask8
11858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11859_mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11860{
11861 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11862 (__v4di) __Y, 4,
11863 (__mmask8) __M);
11864}
11865
936c0fe4
AI
11866extern __inline __mmask8
11867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11868_mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
11869{
11870 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11871 (__v4di) __Y, 4,
11872 (__mmask8) - 1);
11873}
11874
eee5d6f5
AI
11875extern __inline __mmask8
11876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11877_mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11878{
11879 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11880 (__v4di) __Y, 1,
11881 (__mmask8) __M);
11882}
11883
936c0fe4
AI
11884extern __inline __mmask8
11885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11886_mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
11887{
11888 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11889 (__v4di) __Y, 1,
11890 (__mmask8) - 1);
11891}
11892
eee5d6f5
AI
11893extern __inline __mmask8
11894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11895_mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11896{
11897 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11898 (__v4di) __Y, 5,
11899 (__mmask8) __M);
11900}
11901
936c0fe4
AI
11902extern __inline __mmask8
11903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11904_mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
11905{
11906 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11907 (__v4di) __Y, 5,
11908 (__mmask8) - 1);
11909}
11910
eee5d6f5
AI
11911extern __inline __mmask8
11912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11913_mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11914{
11915 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11916 (__v4di) __Y, 2,
11917 (__mmask8) __M);
11918}
11919
936c0fe4
AI
11920extern __inline __mmask8
11921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11922_mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
11923{
11924 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11925 (__v4di) __Y, 2,
11926 (__mmask8) - 1);
11927}
11928
eee5d6f5
AI
11929extern __inline __mmask8
11930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11931_mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11932{
11933 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11934 (__v8si) __Y, 4,
11935 (__mmask8) __M);
11936}
11937
936c0fe4
AI
11938extern __inline __mmask8
11939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11940_mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
11941{
11942 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11943 (__v8si) __Y, 4,
11944 (__mmask8) - 1);
11945}
11946
eee5d6f5
AI
11947extern __inline __mmask8
11948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11949_mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11950{
11951 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11952 (__v8si) __Y, 1,
11953 (__mmask8) __M);
11954}
11955
936c0fe4
AI
11956extern __inline __mmask8
11957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11958_mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
11959{
11960 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11961 (__v8si) __Y, 1,
11962 (__mmask8) - 1);
11963}
11964
eee5d6f5
AI
11965extern __inline __mmask8
11966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11967_mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11968{
11969 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11970 (__v8si) __Y, 5,
11971 (__mmask8) __M);
11972}
11973
936c0fe4
AI
11974extern __inline __mmask8
11975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11976_mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
11977{
11978 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11979 (__v8si) __Y, 5,
11980 (__mmask8) - 1);
11981}
11982
eee5d6f5
AI
11983extern __inline __mmask8
11984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11985_mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11986{
11987 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11988 (__v8si) __Y, 2,
11989 (__mmask8) __M);
11990}
11991
936c0fe4
AI
11992extern __inline __mmask8
11993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11994_mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
11995{
11996 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11997 (__v8si) __Y, 2,
11998 (__mmask8) - 1);
11999}
12000
eee5d6f5
AI
12001extern __inline __mmask8
12002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12003_mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12004{
12005 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12006 (__v4di) __Y, 4,
12007 (__mmask8) __M);
12008}
12009
936c0fe4
AI
12010extern __inline __mmask8
12011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12012_mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
12013{
12014 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12015 (__v4di) __Y, 4,
12016 (__mmask8) - 1);
12017}
12018
eee5d6f5
AI
12019extern __inline __mmask8
12020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12021_mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12022{
12023 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12024 (__v4di) __Y, 1,
12025 (__mmask8) __M);
12026}
12027
936c0fe4
AI
12028extern __inline __mmask8
12029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12030_mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
12031{
12032 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12033 (__v4di) __Y, 1,
12034 (__mmask8) - 1);
12035}
12036
eee5d6f5
AI
12037extern __inline __mmask8
12038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12039_mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12040{
12041 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12042 (__v4di) __Y, 5,
12043 (__mmask8) __M);
12044}
12045
936c0fe4
AI
12046extern __inline __mmask8
12047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12048_mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
12049{
12050 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12051 (__v4di) __Y, 5,
12052 (__mmask8) - 1);
12053}
12054
eee5d6f5
AI
12055extern __inline __mmask8
12056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12057_mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12058{
12059 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12060 (__v4di) __Y, 2,
12061 (__mmask8) __M);
12062}
12063
936c0fe4
AI
12064extern __inline __mmask8
12065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12066_mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
12067{
12068 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12069 (__v4di) __Y, 2,
12070 (__mmask8) - 1);
12071}
12072
eee5d6f5
AI
12073extern __inline __mmask8
12074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12075_mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12076{
12077 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12078 (__v4si) __Y, 4,
12079 (__mmask8) __M);
12080}
12081
936c0fe4
AI
12082extern __inline __mmask8
12083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12084_mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
12085{
12086 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12087 (__v4si) __Y, 4,
12088 (__mmask8) - 1);
12089}
12090
eee5d6f5
AI
12091extern __inline __mmask8
12092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12093_mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12094{
12095 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12096 (__v4si) __Y, 1,
12097 (__mmask8) __M);
12098}
12099
936c0fe4
AI
12100extern __inline __mmask8
12101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12102_mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
12103{
12104 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12105 (__v4si) __Y, 1,
12106 (__mmask8) - 1);
12107}
12108
eee5d6f5
AI
12109extern __inline __mmask8
12110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12111_mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12112{
12113 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12114 (__v4si) __Y, 5,
12115 (__mmask8) __M);
12116}
12117
936c0fe4
AI
12118extern __inline __mmask8
12119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12120_mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
12121{
12122 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12123 (__v4si) __Y, 5,
12124 (__mmask8) - 1);
12125}
12126
eee5d6f5
AI
12127extern __inline __mmask8
12128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12129_mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12130{
12131 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12132 (__v4si) __Y, 2,
12133 (__mmask8) __M);
12134}
12135
936c0fe4
AI
12136extern __inline __mmask8
12137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12138_mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
12139{
12140 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12141 (__v4si) __Y, 2,
12142 (__mmask8) - 1);
12143}
12144
eee5d6f5
AI
12145extern __inline __mmask8
12146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12147_mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12148{
12149 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12150 (__v2di) __Y, 4,
12151 (__mmask8) __M);
12152}
12153
936c0fe4
AI
12154extern __inline __mmask8
12155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12156_mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
12157{
12158 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12159 (__v2di) __Y, 4,
12160 (__mmask8) - 1);
12161}
12162
eee5d6f5
AI
12163extern __inline __mmask8
12164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12165_mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12166{
12167 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12168 (__v2di) __Y, 1,
12169 (__mmask8) __M);
12170}
12171
936c0fe4
AI
12172extern __inline __mmask8
12173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12174_mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
12175{
12176 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12177 (__v2di) __Y, 1,
12178 (__mmask8) - 1);
12179}
12180
eee5d6f5
AI
12181extern __inline __mmask8
12182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12183_mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12184{
12185 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12186 (__v2di) __Y, 5,
12187 (__mmask8) __M);
12188}
12189
936c0fe4
AI
12190extern __inline __mmask8
12191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12192_mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
12193{
12194 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12195 (__v2di) __Y, 5,
12196 (__mmask8) - 1);
12197}
12198
eee5d6f5
AI
12199extern __inline __mmask8
12200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12201_mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12202{
12203 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12204 (__v2di) __Y, 2,
12205 (__mmask8) __M);
12206}
12207
936c0fe4
AI
12208extern __inline __mmask8
12209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12210_mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
12211{
12212 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12213 (__v2di) __Y, 2,
12214 (__mmask8) - 1);
12215}
12216
eee5d6f5
AI
12217extern __inline __mmask8
12218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12219_mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12220{
12221 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12222 (__v4si) __Y, 4,
12223 (__mmask8) __M);
12224}
12225
936c0fe4
AI
12226extern __inline __mmask8
12227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12228_mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
12229{
12230 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12231 (__v4si) __Y, 4,
12232 (__mmask8) - 1);
12233}
12234
eee5d6f5
AI
12235extern __inline __mmask8
12236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12237_mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12238{
12239 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12240 (__v4si) __Y, 1,
12241 (__mmask8) __M);
12242}
12243
936c0fe4
AI
12244extern __inline __mmask8
12245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12246_mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
12247{
12248 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12249 (__v4si) __Y, 1,
12250 (__mmask8) - 1);
12251}
12252
eee5d6f5
AI
12253extern __inline __mmask8
12254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12255_mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12256{
12257 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12258 (__v4si) __Y, 5,
12259 (__mmask8) __M);
12260}
12261
936c0fe4
AI
12262extern __inline __mmask8
12263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12264_mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
12265{
12266 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12267 (__v4si) __Y, 5,
12268 (__mmask8) - 1);
12269}
12270
eee5d6f5
AI
12271extern __inline __mmask8
12272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12273_mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12274{
12275 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12276 (__v4si) __Y, 2,
12277 (__mmask8) __M);
12278}
12279
936c0fe4
AI
12280extern __inline __mmask8
12281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12282_mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
12283{
12284 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12285 (__v4si) __Y, 2,
12286 (__mmask8) - 1);
12287}
12288
eee5d6f5
AI
12289extern __inline __mmask8
12290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12291_mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12292{
12293 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12294 (__v2di) __Y, 4,
12295 (__mmask8) __M);
12296}
12297
936c0fe4
AI
12298extern __inline __mmask8
12299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12300_mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
12301{
12302 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12303 (__v2di) __Y, 4,
12304 (__mmask8) - 1);
12305}
12306
eee5d6f5
AI
12307extern __inline __mmask8
12308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12309_mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12310{
12311 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12312 (__v2di) __Y, 1,
12313 (__mmask8) __M);
12314}
12315
936c0fe4
AI
12316extern __inline __mmask8
12317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12318_mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
12319{
12320 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12321 (__v2di) __Y, 1,
12322 (__mmask8) - 1);
12323}
12324
eee5d6f5
AI
12325extern __inline __mmask8
12326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12327_mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12328{
12329 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12330 (__v2di) __Y, 5,
12331 (__mmask8) __M);
12332}
12333
936c0fe4
AI
12334extern __inline __mmask8
12335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12336_mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
12337{
12338 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12339 (__v2di) __Y, 5,
12340 (__mmask8) - 1);
12341}
12342
eee5d6f5
AI
12343extern __inline __mmask8
12344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12345_mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12346{
12347 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12348 (__v2di) __Y, 2,
12349 (__mmask8) __M);
12350}
12351
936c0fe4
AI
12352extern __inline __mmask8
12353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12354_mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
12355{
12356 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12357 (__v2di) __Y, 2,
12358 (__mmask8) - 1);
12359}
12360
12361#else
12362#define _mm256_permutex_pd(X, M) \
12363 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M), \
12364 (__v4df)(__m256d)_mm256_undefined_pd(),\
12365 (__mmask8)-1))
12366
12367#define _mm256_maskz_permutex_epi64(M, X, I) \
12368 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12369 (int)(I), \
12370 (__v4di)(__m256i) \
12371 (_mm256_setzero_si256()),\
12372 (__mmask8)(M)))
12373
12374#define _mm256_mask_permutex_epi64(W, M, X, I) \
12375 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12376 (int)(I), \
12377 (__v4di)(__m256i)(W), \
12378 (__mmask8)(M)))
12379
12380#define _mm256_insertf32x4(X, Y, C) \
12381 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12382 (__v4sf)(__m128) (Y), (int) (C), \
12383 (__v8sf)(__m256)_mm256_setzero_ps(), \
12384 (__mmask8)-1))
12385
12386#define _mm256_mask_insertf32x4(W, U, X, Y, C) \
12387 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12388 (__v4sf)(__m128) (Y), (int) (C), \
12389 (__v8sf)(__m256)(W), \
12390 (__mmask8)(U)))
12391
12392#define _mm256_maskz_insertf32x4(U, X, Y, C) \
12393 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12394 (__v4sf)(__m128) (Y), (int) (C), \
12395 (__v8sf)(__m256)_mm256_setzero_ps(), \
12396 (__mmask8)(U)))
12397
12398#define _mm256_inserti32x4(X, Y, C) \
12399 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12400 (__v4si)(__m128i) (Y), (int) (C), \
12401 (__v8si)(__m256i)_mm256_setzero_si256(), \
12402 (__mmask8)-1))
12403
12404#define _mm256_mask_inserti32x4(W, U, X, Y, C) \
12405 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12406 (__v4si)(__m128i) (Y), (int) (C), \
12407 (__v8si)(__m256i)(W), \
12408 (__mmask8)(U)))
12409
12410#define _mm256_maskz_inserti32x4(U, X, Y, C) \
12411 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12412 (__v4si)(__m128i) (Y), (int) (C), \
12413 (__v8si)(__m256i)_mm256_setzero_si256(), \
12414 (__mmask8)(U)))
12415
12416#define _mm256_extractf32x4_ps(X, C) \
12417 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12418 (int) (C), \
12419 (__v4sf)(__m128)_mm_setzero_ps(), \
12420 (__mmask8)-1))
12421
12422#define _mm256_mask_extractf32x4_ps(W, U, X, C) \
12423 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12424 (int) (C), \
12425 (__v4sf)(__m128)(W), \
12426 (__mmask8)(U)))
12427
12428#define _mm256_maskz_extractf32x4_ps(U, X, C) \
12429 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12430 (int) (C), \
12431 (__v4sf)(__m128)_mm_setzero_ps(), \
12432 (__mmask8)(U)))
12433
12434#define _mm256_extracti32x4_epi32(X, C) \
12435 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12436 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1))
12437
12438#define _mm256_mask_extracti32x4_epi32(W, U, X, C) \
12439 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12440 (int) (C), (__v4si)(__m128i)(W), (__mmask8)(U)))
12441
12442#define _mm256_maskz_extracti32x4_epi32(U, X, C) \
12443 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12444 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
12445
12446#define _mm256_shuffle_i64x2(X, Y, C) \
12447 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12448 (__v4di)(__m256i)(Y), (int)(C), \
12449 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12450 (__mmask8)-1))
12451
12452#define _mm256_mask_shuffle_i64x2(W, U, X, Y, C) \
12453 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12454 (__v4di)(__m256i)(Y), (int)(C), \
12455 (__v4di)(__m256i)(W),\
12456 (__mmask8)(U)))
12457
12458#define _mm256_maskz_shuffle_i64x2(U, X, Y, C) \
12459 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12460 (__v4di)(__m256i)(Y), (int)(C), \
12461 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12462 (__mmask8)(U)))
12463
12464#define _mm256_shuffle_i32x4(X, Y, C) \
12465 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12466 (__v8si)(__m256i)(Y), (int)(C), \
12467 (__v8si)(__m256i)_mm256_setzero_si256(), \
12468 (__mmask8)-1))
12469
12470#define _mm256_mask_shuffle_i32x4(W, U, X, Y, C) \
12471 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12472 (__v8si)(__m256i)(Y), (int)(C), \
12473 (__v8si)(__m256i)(W), \
12474 (__mmask8)(U)))
12475
12476#define _mm256_maskz_shuffle_i32x4(U, X, Y, C) \
12477 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12478 (__v8si)(__m256i)(Y), (int)(C), \
12479 (__v8si)(__m256i)_mm256_setzero_si256(), \
12480 (__mmask8)(U)))
12481
12482#define _mm256_shuffle_f64x2(X, Y, C) \
12483 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12484 (__v4df)(__m256d)(Y), (int)(C), \
12485 (__v4df)(__m256d)_mm256_setzero_pd(), \
12486 (__mmask8)-1))
12487
12488#define _mm256_mask_shuffle_f64x2(W, U, X, Y, C) \
12489 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12490 (__v4df)(__m256d)(Y), (int)(C), \
12491 (__v4df)(__m256d)(W), \
12492 (__mmask8)(U)))
12493
12494#define _mm256_maskz_shuffle_f64x2(U, X, Y, C) \
12495 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12496 (__v4df)(__m256d)(Y), (int)(C), \
12497 (__v4df)(__m256d)_mm256_setzero_pd(), \
12498 (__mmask8)(U)))
12499
12500#define _mm256_shuffle_f32x4(X, Y, C) \
12501 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12502 (__v8sf)(__m256)(Y), (int)(C), \
12503 (__v8sf)(__m256)_mm256_setzero_ps(), \
12504 (__mmask8)-1))
12505
12506#define _mm256_mask_shuffle_f32x4(W, U, X, Y, C) \
12507 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12508 (__v8sf)(__m256)(Y), (int)(C), \
12509 (__v8sf)(__m256)(W), \
12510 (__mmask8)(U)))
12511
12512#define _mm256_maskz_shuffle_f32x4(U, X, Y, C) \
12513 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12514 (__v8sf)(__m256)(Y), (int)(C), \
12515 (__v8sf)(__m256)_mm256_setzero_ps(), \
12516 (__mmask8)(U)))
12517
12518#define _mm256_mask_shuffle_pd(W, U, A, B, C) \
12519 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
12520 (__v4df)(__m256d)(B), (int)(C), \
12521 (__v4df)(__m256d)(W), \
12522 (__mmask8)(U)))
12523
12524#define _mm256_maskz_shuffle_pd(U, A, B, C) \
12525 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
12526 (__v4df)(__m256d)(B), (int)(C), \
12527 (__v4df)(__m256d)_mm256_setzero_pd(),\
12528 (__mmask8)(U)))
12529
12530#define _mm_mask_shuffle_pd(W, U, A, B, C) \
12531 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
12532 (__v2df)(__m128d)(B), (int)(C), \
12533 (__v2df)(__m128d)(W), \
12534 (__mmask8)(U)))
12535
12536#define _mm_maskz_shuffle_pd(U, A, B, C) \
12537 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
12538 (__v2df)(__m128d)(B), (int)(C), \
12539 (__v2df)(__m128d)_mm_setzero_pd(), \
12540 (__mmask8)(U)))
12541
12542#define _mm256_mask_shuffle_ps(W, U, A, B, C) \
12543 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
12544 (__v8sf)(__m256)(B), (int)(C), \
12545 (__v8sf)(__m256)(W), \
12546 (__mmask8)(U)))
12547
12548#define _mm256_maskz_shuffle_ps(U, A, B, C) \
12549 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
12550 (__v8sf)(__m256)(B), (int)(C), \
12551 (__v8sf)(__m256)_mm256_setzero_ps(), \
12552 (__mmask8)(U)))
12553
12554#define _mm_mask_shuffle_ps(W, U, A, B, C) \
12555 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
12556 (__v4sf)(__m128)(B), (int)(C), \
12557 (__v4sf)(__m128)(W), \
12558 (__mmask8)(U)))
12559
12560#define _mm_maskz_shuffle_ps(U, A, B, C) \
12561 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
12562 (__v4sf)(__m128)(B), (int)(C), \
12563 (__v4sf)(__m128)_mm_setzero_ps(), \
12564 (__mmask8)(U)))
12565
12566#define _mm256_fixupimm_pd(X, Y, Z, C) \
12567 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
12568 (__v4df)(__m256d)(Y), \
12569 (__v4di)(__m256i)(Z), (int)(C), \
12570 (__mmask8)(-1)))
12571
12572#define _mm256_mask_fixupimm_pd(X, U, Y, Z, C) \
12573 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
12574 (__v4df)(__m256d)(Y), \
12575 (__v4di)(__m256i)(Z), (int)(C), \
12576 (__mmask8)(U)))
12577
12578#define _mm256_maskz_fixupimm_pd(U, X, Y, Z, C) \
12579 ((__m256d)__builtin_ia32_fixupimmpd256_maskz ((__v4df)(__m256d)(X), \
12580 (__v4df)(__m256d)(Y), \
12581 (__v4di)(__m256i)(Z), (int)(C),\
12582 (__mmask8)(U)))
12583
12584#define _mm256_fixupimm_ps(X, Y, Z, C) \
12585 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
12586 (__v8sf)(__m256)(Y), \
12587 (__v8si)(__m256i)(Z), (int)(C), \
12588 (__mmask8)(-1)))
12589
12590
12591#define _mm256_mask_fixupimm_ps(X, U, Y, Z, C) \
12592 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
12593 (__v8sf)(__m256)(Y), \
12594 (__v8si)(__m256i)(Z), (int)(C), \
12595 (__mmask8)(U)))
12596
12597#define _mm256_maskz_fixupimm_ps(U, X, Y, Z, C) \
12598 ((__m256)__builtin_ia32_fixupimmps256_maskz ((__v8sf)(__m256)(X), \
12599 (__v8sf)(__m256)(Y), \
12600 (__v8si)(__m256i)(Z), (int)(C),\
12601 (__mmask8)(U)))
12602
12603#define _mm_fixupimm_pd(X, Y, Z, C) \
12604 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
12605 (__v2df)(__m128d)(Y), \
12606 (__v2di)(__m128i)(Z), (int)(C), \
12607 (__mmask8)(-1)))
12608
12609
12610#define _mm_mask_fixupimm_pd(X, U, Y, Z, C) \
12611 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
12612 (__v2df)(__m128d)(Y), \
12613 (__v2di)(__m128i)(Z), (int)(C), \
12614 (__mmask8)(U)))
12615
12616#define _mm_maskz_fixupimm_pd(U, X, Y, Z, C) \
12617 ((__m128d)__builtin_ia32_fixupimmpd128_maskz ((__v2df)(__m128d)(X), \
12618 (__v2df)(__m128d)(Y), \
12619 (__v2di)(__m128i)(Z), (int)(C),\
12620 (__mmask8)(U)))
12621
12622#define _mm_fixupimm_ps(X, Y, Z, C) \
12623 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
12624 (__v4sf)(__m128)(Y), \
12625 (__v4si)(__m128i)(Z), (int)(C), \
12626 (__mmask8)(-1)))
12627
12628#define _mm_mask_fixupimm_ps(X, U, Y, Z, C) \
12629 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
12630 (__v4sf)(__m128)(Y), \
12631 (__v4si)(__m128i)(Z), (int)(C),\
12632 (__mmask8)(U)))
12633
12634#define _mm_maskz_fixupimm_ps(U, X, Y, Z, C) \
12635 ((__m128)__builtin_ia32_fixupimmps128_maskz ((__v4sf)(__m128)(X), \
12636 (__v4sf)(__m128)(Y), \
12637 (__v4si)(__m128i)(Z), (int)(C),\
12638 (__mmask8)(U)))
12639
12640#define _mm256_mask_srli_epi32(W, U, A, B) \
12641 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
12642 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
12643
12644#define _mm256_maskz_srli_epi32(U, A, B) \
12645 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
12646 (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
12647
12648#define _mm_mask_srli_epi32(W, U, A, B) \
12649 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
12650 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
12651
12652#define _mm_maskz_srli_epi32(U, A, B) \
12653 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
12654 (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
12655
12656#define _mm256_mask_srli_epi64(W, U, A, B) \
12657 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
12658 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
12659
12660#define _mm256_maskz_srli_epi64(U, A, B) \
12661 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
12662 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
12663
12664#define _mm_mask_srli_epi64(W, U, A, B) \
12665 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
12666 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
12667
12668#define _mm_maskz_srli_epi64(U, A, B) \
12669 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
12670 (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
12671
12672#define _mm256_mask_slli_epi32(W, U, X, C) \
12673 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
12674 (__v8si)(__m256i)(W),\
12675 (__mmask8)(U)))
12676
12677#define _mm256_maskz_slli_epi32(U, X, C) \
12678 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
12679 (__v8si)(__m256i)_mm256_setzero_si256(),\
12680 (__mmask8)(U)))
12681
12682#define _mm256_mask_slli_epi64(W, U, X, C) \
12683 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
12684 (__v4di)(__m256i)(W),\
12685 (__mmask8)(U)))
12686
12687#define _mm256_maskz_slli_epi64(U, X, C) \
12688 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
12689 (__v4di)(__m256i)_mm256_setzero_si256 (),\
12690 (__mmask8)(U)))
12691
12692#define _mm_mask_slli_epi32(W, U, X, C) \
12693 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12694 (__v4si)(__m128i)(W),\
12695 (__mmask8)(U)))
12696
12697#define _mm_maskz_slli_epi32(U, X, C) \
12698 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12699 (__v4si)(__m128i)_mm_setzero_si128 (),\
12700 (__mmask8)(U)))
12701
12702#define _mm_mask_slli_epi64(W, U, X, C) \
12703 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12704 (__v2di)(__m128i)(W),\
12705 (__mmask8)(U)))
12706
12707#define _mm_maskz_slli_epi64(U, X, C) \
12708 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12709 (__v2di)(__m128i)_mm_setzero_di(),\
12710 (__mmask8)(U)))
12711
12712#define _mm256_ternarylogic_epi64(A, B, C, I) \
12713 ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
12714 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)-1))
12715
12716#define _mm256_mask_ternarylogic_epi64(A, U, B, C, I) \
12717 ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
12718 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12719
12720#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I) \
12721 ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di)(__m256i)(A), \
12722 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12723
12724#define _mm256_ternarylogic_epi32(A, B, C, I) \
12725 ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
12726 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)-1))
12727
12728#define _mm256_mask_ternarylogic_epi32(A, U, B, C, I) \
12729 ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
12730 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12731
12732#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I) \
12733 ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si)(__m256i)(A), \
12734 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12735
12736#define _mm_ternarylogic_epi64(A, B, C, I) \
12737 ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
12738 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)-1))
12739
12740#define _mm_mask_ternarylogic_epi64(A, U, B, C, I) \
12741 ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
12742 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12743
12744#define _mm_maskz_ternarylogic_epi64(U, A, B, C, I) \
12745 ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di)(__m128i)(A), \
12746 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12747
12748#define _mm_ternarylogic_epi32(A, B, C, I) \
12749 ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
12750 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)-1))
12751
12752#define _mm_mask_ternarylogic_epi32(A, U, B, C, I) \
12753 ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
12754 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12755
12756#define _mm_maskz_ternarylogic_epi32(U, A, B, C, I) \
12757 ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si)(__m128i)(A), \
12758 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12759
12760#define _mm256_roundscale_ps(A, B) \
12761 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
12762 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)-1))
12763
12764#define _mm256_mask_roundscale_ps(W, U, A, B) \
12765 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
12766 (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
12767
12768#define _mm256_maskz_roundscale_ps(U, A, B) \
12769 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
12770 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)(U)))
12771
12772#define _mm256_roundscale_pd(A, B) \
12773 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
12774 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)-1))
12775
12776#define _mm256_mask_roundscale_pd(W, U, A, B) \
12777 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
12778 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
12779
12780#define _mm256_maskz_roundscale_pd(U, A, B) \
12781 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
12782 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
12783
12784#define _mm_roundscale_ps(A, B) \
12785 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
12786 (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)-1))
12787
12788#define _mm_mask_roundscale_ps(W, U, A, B) \
12789 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
12790 (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
12791
12792#define _mm_maskz_roundscale_ps(U, A, B) \
12793 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
12794 (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)(U)))
12795
12796#define _mm_roundscale_pd(A, B) \
12797 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
12798 (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)-1))
12799
12800#define _mm_mask_roundscale_pd(W, U, A, B) \
12801 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
12802 (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
12803
12804#define _mm_maskz_roundscale_pd(U, A, B) \
12805 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
12806 (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)(U)))
12807
12808#define _mm256_getmant_ps(X, B, C) \
12809 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12810 (int)(((C)<<2) | (B)), \
12811 (__v8sf)(__m256)_mm256_setzero_ps(), \
12812 (__mmask8)-1))
12813
12814#define _mm256_mask_getmant_ps(W, U, X, B, C) \
12815 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12816 (int)(((C)<<2) | (B)), \
12817 (__v8sf)(__m256)(W), \
12818 (__mmask8)(U)))
12819
12820#define _mm256_maskz_getmant_ps(U, X, B, C) \
12821 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12822 (int)(((C)<<2) | (B)), \
12823 (__v8sf)(__m256)_mm256_setzero_ps(), \
12824 (__mmask8)(U)))
12825
12826#define _mm_getmant_ps(X, B, C) \
12827 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12828 (int)(((C)<<2) | (B)), \
12829 (__v4sf)(__m128)_mm_setzero_ps(), \
12830 (__mmask8)-1))
12831
12832#define _mm_mask_getmant_ps(W, U, X, B, C) \
12833 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12834 (int)(((C)<<2) | (B)), \
12835 (__v4sf)(__m128)(W), \
12836 (__mmask8)(U)))
12837
12838#define _mm_maskz_getmant_ps(U, X, B, C) \
12839 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12840 (int)(((C)<<2) | (B)), \
12841 (__v4sf)(__m128)_mm_setzero_ps(), \
12842 (__mmask8)(U)))
12843
12844#define _mm256_getmant_pd(X, B, C) \
12845 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12846 (int)(((C)<<2) | (B)), \
12847 (__v4df)(__m256d)_mm256_setzero_pd(), \
12848 (__mmask8)-1))
12849
12850#define _mm256_mask_getmant_pd(W, U, X, B, C) \
12851 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12852 (int)(((C)<<2) | (B)), \
12853 (__v4df)(__m256d)(W), \
12854 (__mmask8)(U)))
12855
12856#define _mm256_maskz_getmant_pd(U, X, B, C) \
12857 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12858 (int)(((C)<<2) | (B)), \
12859 (__v4df)(__m256d)_mm256_setzero_pd(), \
12860 (__mmask8)(U)))
12861
12862#define _mm_getmant_pd(X, B, C) \
12863 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12864 (int)(((C)<<2) | (B)), \
12865 (__v2df)(__m128d)_mm_setzero_pd(), \
12866 (__mmask8)-1))
12867
12868#define _mm_mask_getmant_pd(W, U, X, B, C) \
12869 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12870 (int)(((C)<<2) | (B)), \
12871 (__v2df)(__m128d)(W), \
12872 (__mmask8)(U)))
12873
12874#define _mm_maskz_getmant_pd(U, X, B, C) \
12875 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12876 (int)(((C)<<2) | (B)), \
12877 (__v2df)(__m128d)_mm_setzero_pd(), \
12878 (__mmask8)(U)))
12879
12880#define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12881 (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256)V1OLD, \
12882 (float const *)ADDR, \
12883 (__v8si)(__m256i)INDEX, \
12884 (__mmask8)MASK, (int)SCALE)
12885
12886#define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12887 (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128)V1OLD, \
12888 (float const *)ADDR, \
12889 (__v4si)(__m128i)INDEX, \
12890 (__mmask8)MASK, (int)SCALE)
12891
12892#define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12893 (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d)V1OLD, \
12894 (double const *)ADDR, \
12895 (__v4si)(__m128i)INDEX, \
12896 (__mmask8)MASK, (int)SCALE)
12897
12898#define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12899 (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d)V1OLD, \
12900 (double const *)ADDR, \
12901 (__v4si)(__m128i)INDEX, \
12902 (__mmask8)MASK, (int)SCALE)
12903
12904#define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12905 (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128)V1OLD, \
12906 (float const *)ADDR, \
12907 (__v4di)(__m256i)INDEX, \
12908 (__mmask8)MASK, (int)SCALE)
12909
12910#define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12911 (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128)V1OLD, \
12912 (float const *)ADDR, \
12913 (__v2di)(__m128i)INDEX, \
12914 (__mmask8)MASK, (int)SCALE)
12915
12916#define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12917 (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d)V1OLD, \
12918 (double const *)ADDR, \
12919 (__v4di)(__m256i)INDEX, \
12920 (__mmask8)MASK, (int)SCALE)
12921
12922#define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12923 (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d)V1OLD, \
12924 (double const *)ADDR, \
12925 (__v2di)(__m128i)INDEX, \
12926 (__mmask8)MASK, (int)SCALE)
12927
12928#define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12929 (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i)V1OLD, \
12930 (int const *)ADDR, \
12931 (__v8si)(__m256i)INDEX, \
12932 (__mmask8)MASK, (int)SCALE)
12933
12934#define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12935 (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i)V1OLD, \
12936 (int const *)ADDR, \
12937 (__v4si)(__m128i)INDEX, \
12938 (__mmask8)MASK, (int)SCALE)
12939
12940#define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12941 (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i)V1OLD, \
12942 (long long const *)ADDR, \
12943 (__v4si)(__m128i)INDEX, \
12944 (__mmask8)MASK, (int)SCALE)
12945
12946#define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12947 (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i)V1OLD, \
12948 (long long const *)ADDR, \
12949 (__v4si)(__m128i)INDEX, \
12950 (__mmask8)MASK, (int)SCALE)
12951
12952#define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12953 (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i)V1OLD, \
12954 (int const *)ADDR, \
12955 (__v4di)(__m256i)INDEX, \
12956 (__mmask8)MASK, (int)SCALE)
12957
12958#define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12959 (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i)V1OLD, \
12960 (int const *)ADDR, \
12961 (__v2di)(__m128i)INDEX, \
12962 (__mmask8)MASK, (int)SCALE)
12963
12964#define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12965 (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i)V1OLD, \
12966 (long long const *)ADDR, \
12967 (__v4di)(__m256i)INDEX, \
12968 (__mmask8)MASK, (int)SCALE)
12969
12970#define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12971 (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i)V1OLD, \
12972 (long long const *)ADDR, \
12973 (__v2di)(__m128i)INDEX, \
12974 (__mmask8)MASK, (int)SCALE)
12975
12976#define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
12977 __builtin_ia32_scattersiv8sf ((float *)ADDR, (__mmask8)0xFF, \
12978 (__v8si)(__m256i)INDEX, \
12979 (__v8sf)(__m256)V1, (int)SCALE)
12980
12981#define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
12982 __builtin_ia32_scattersiv8sf ((float *)ADDR, (__mmask8)MASK, \
12983 (__v8si)(__m256i)INDEX, \
12984 (__v8sf)(__m256)V1, (int)SCALE)
12985
12986#define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
12987 __builtin_ia32_scattersiv4sf ((float *)ADDR, (__mmask8)0xFF, \
12988 (__v4si)(__m128i)INDEX, \
12989 (__v4sf)(__m128)V1, (int)SCALE)
12990
12991#define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
12992 __builtin_ia32_scattersiv4sf ((float *)ADDR, (__mmask8)MASK, \
12993 (__v4si)(__m128i)INDEX, \
12994 (__v4sf)(__m128)V1, (int)SCALE)
12995
12996#define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
12997 __builtin_ia32_scattersiv4df ((double *)ADDR, (__mmask8)0xFF, \
12998 (__v4si)(__m128i)INDEX, \
12999 (__v4df)(__m256d)V1, (int)SCALE)
13000
13001#define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13002 __builtin_ia32_scattersiv4df ((double *)ADDR, (__mmask8)MASK, \
13003 (__v4si)(__m128i)INDEX, \
13004 (__v4df)(__m256d)V1, (int)SCALE)
13005
13006#define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
13007 __builtin_ia32_scattersiv2df ((double *)ADDR, (__mmask8)0xFF, \
13008 (__v4si)(__m128i)INDEX, \
13009 (__v2df)(__m128d)V1, (int)SCALE)
13010
13011#define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13012 __builtin_ia32_scattersiv2df ((double *)ADDR, (__mmask8)MASK, \
13013 (__v4si)(__m128i)INDEX, \
13014 (__v2df)(__m128d)V1, (int)SCALE)
13015
13016#define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
13017 __builtin_ia32_scatterdiv8sf ((float *)ADDR, (__mmask8)0xFF, \
13018 (__v4di)(__m256i)INDEX, \
13019 (__v4sf)(__m128)V1, (int)SCALE)
13020
13021#define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
13022 __builtin_ia32_scatterdiv8sf ((float *)ADDR, (__mmask8)MASK, \
13023 (__v4di)(__m256i)INDEX, \
13024 (__v4sf)(__m128)V1, (int)SCALE)
13025
13026#define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
13027 __builtin_ia32_scatterdiv4sf ((float *)ADDR, (__mmask8)0xFF, \
13028 (__v2di)(__m128i)INDEX, \
13029 (__v4sf)(__m128)V1, (int)SCALE)
13030
13031#define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
13032 __builtin_ia32_scatterdiv4sf ((float *)ADDR, (__mmask8)MASK, \
13033 (__v2di)(__m128i)INDEX, \
13034 (__v4sf)(__m128)V1, (int)SCALE)
13035
13036#define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
13037 __builtin_ia32_scatterdiv4df ((double *)ADDR, (__mmask8)0xFF, \
13038 (__v4di)(__m256i)INDEX, \
13039 (__v4df)(__m256d)V1, (int)SCALE)
13040
13041#define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13042 __builtin_ia32_scatterdiv4df ((double *)ADDR, (__mmask8)MASK, \
13043 (__v4di)(__m256i)INDEX, \
13044 (__v4df)(__m256d)V1, (int)SCALE)
13045
13046#define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
13047 __builtin_ia32_scatterdiv2df ((double *)ADDR, (__mmask8)0xFF, \
13048 (__v2di)(__m128i)INDEX, \
13049 (__v2df)(__m128d)V1, (int)SCALE)
13050
13051#define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13052 __builtin_ia32_scatterdiv2df ((double *)ADDR, (__mmask8)MASK, \
13053 (__v2di)(__m128i)INDEX, \
13054 (__v2df)(__m128d)V1, (int)SCALE)
13055
13056#define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
13057 __builtin_ia32_scattersiv8si ((int *)ADDR, (__mmask8)0xFF, \
13058 (__v8si)(__m256i)INDEX, \
13059 (__v8si)(__m256i)V1, (int)SCALE)
13060
13061#define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13062 __builtin_ia32_scattersiv8si ((int *)ADDR, (__mmask8)MASK, \
13063 (__v8si)(__m256i)INDEX, \
13064 (__v8si)(__m256i)V1, (int)SCALE)
13065
13066#define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
13067 __builtin_ia32_scattersiv4si ((int *)ADDR, (__mmask8)0xFF, \
13068 (__v4si)(__m128i)INDEX, \
13069 (__v4si)(__m128i)V1, (int)SCALE)
13070
13071#define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13072 __builtin_ia32_scattersiv4si ((int *)ADDR, (__mmask8)MASK, \
13073 (__v4si)(__m128i)INDEX, \
13074 (__v4si)(__m128i)V1, (int)SCALE)
13075
13076#define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
13077 __builtin_ia32_scattersiv4di ((long long *)ADDR, (__mmask8)0xFF, \
13078 (__v4si)(__m128i)INDEX, \
13079 (__v4di)(__m256i)V1, (int)SCALE)
13080
13081#define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13082 __builtin_ia32_scattersiv4di ((long long *)ADDR, (__mmask8)MASK, \
13083 (__v4si)(__m128i)INDEX, \
13084 (__v4di)(__m256i)V1, (int)SCALE)
13085
13086#define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
13087 __builtin_ia32_scattersiv2di ((long long *)ADDR, (__mmask8)0xFF, \
13088 (__v4si)(__m128i)INDEX, \
13089 (__v2di)(__m128i)V1, (int)SCALE)
13090
13091#define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13092 __builtin_ia32_scattersiv2di ((long long *)ADDR, (__mmask8)MASK, \
13093 (__v4si)(__m128i)INDEX, \
13094 (__v2di)(__m128i)V1, (int)SCALE)
13095
13096#define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
13097 __builtin_ia32_scatterdiv8si ((int *)ADDR, (__mmask8)0xFF, \
13098 (__v4di)(__m256i)INDEX, \
13099 (__v4si)(__m128i)V1, (int)SCALE)
13100
13101#define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13102 __builtin_ia32_scatterdiv8si ((int *)ADDR, (__mmask8)MASK, \
13103 (__v4di)(__m256i)INDEX, \
13104 (__v4si)(__m128i)V1, (int)SCALE)
13105
13106#define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
13107 __builtin_ia32_scatterdiv4si ((int *)ADDR, (__mmask8)0xFF, \
13108 (__v2di)(__m128i)INDEX, \
13109 (__v4si)(__m128i)V1, (int)SCALE)
13110
13111#define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13112 __builtin_ia32_scatterdiv4si ((int *)ADDR, (__mmask8)MASK, \
13113 (__v2di)(__m128i)INDEX, \
13114 (__v4si)(__m128i)V1, (int)SCALE)
13115
13116#define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
13117 __builtin_ia32_scatterdiv4di ((long long *)ADDR, (__mmask8)0xFF, \
13118 (__v4di)(__m256i)INDEX, \
13119 (__v4di)(__m256i)V1, (int)SCALE)
13120
13121#define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13122 __builtin_ia32_scatterdiv4di ((long long *)ADDR, (__mmask8)MASK, \
13123 (__v4di)(__m256i)INDEX, \
13124 (__v4di)(__m256i)V1, (int)SCALE)
13125
13126#define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
13127 __builtin_ia32_scatterdiv2di ((long long *)ADDR, (__mmask8)0xFF, \
13128 (__v2di)(__m128i)INDEX, \
13129 (__v2di)(__m128i)V1, (int)SCALE)
13130
13131#define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13132 __builtin_ia32_scatterdiv2di ((long long *)ADDR, (__mmask8)MASK, \
13133 (__v2di)(__m128i)INDEX, \
13134 (__v2di)(__m128i)V1, (int)SCALE)
13135
13136#define _mm256_mask_shuffle_epi32(W, U, X, C) \
13137 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
13138 (__v8si)(__m256i)(W), \
13139 (__mmask8)(U)))
13140
13141#define _mm256_maskz_shuffle_epi32(U, X, C) \
13142 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
13143 (__v8si)(__m256i)_mm256_setzero_si256(), \
13144 (__mmask8)(U)))
13145
13146#define _mm_mask_shuffle_epi32(W, U, X, C) \
13147 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
13148 (__v4si)(__m128i)(W), \
13149 (__mmask8)(U)))
13150
13151#define _mm_maskz_shuffle_epi32(U, X, C) \
13152 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
13153 (__v4si)(__m128i)_mm_setzero_si128 (), \
13154 (__mmask8)(U)))
13155
13156#define _mm256_rol_epi64(A, B) \
13157 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13158 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13159 (__mmask8)-1))
13160
13161#define _mm256_mask_rol_epi64(W, U, A, B) \
13162 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13163 (__v4di)(__m256i)(W), \
13164 (__mmask8)(U)))
13165
13166#define _mm256_maskz_rol_epi64(U, A, B) \
13167 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13168 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13169 (__mmask8)(U)))
13170
13171#define _mm_rol_epi64(A, B) \
13172 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13173 (__v2di)(__m128i)_mm_setzero_di(), \
13174 (__mmask8)-1))
13175
13176#define _mm_mask_rol_epi64(W, U, A, B) \
13177 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13178 (__v2di)(__m128i)(W), \
13179 (__mmask8)(U)))
13180
13181#define _mm_maskz_rol_epi64(U, A, B) \
13182 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13183 (__v2di)(__m128i)_mm_setzero_di(), \
13184 (__mmask8)(U)))
13185
13186#define _mm256_ror_epi64(A, B) \
13187 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13188 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13189 (__mmask8)-1))
13190
13191#define _mm256_mask_ror_epi64(W, U, A, B) \
13192 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13193 (__v4di)(__m256i)(W), \
13194 (__mmask8)(U)))
13195
13196#define _mm256_maskz_ror_epi64(U, A, B) \
13197 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13198 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13199 (__mmask8)(U)))
13200
13201#define _mm_ror_epi64(A, B) \
13202 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13203 (__v2di)(__m128i)_mm_setzero_di(), \
13204 (__mmask8)-1))
13205
13206#define _mm_mask_ror_epi64(W, U, A, B) \
13207 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13208 (__v2di)(__m128i)(W), \
13209 (__mmask8)(U)))
13210
13211#define _mm_maskz_ror_epi64(U, A, B) \
13212 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13213 (__v2di)(__m128i)_mm_setzero_di(), \
13214 (__mmask8)(U)))
13215
13216#define _mm256_rol_epi32(A, B) \
13217 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
13218 (__v8si)(__m256i)_mm256_setzero_si256(),\
13219 (__mmask8)-1))
13220
13221#define _mm256_mask_rol_epi32(W, U, A, B) \
13222 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
13223 (__v8si)(__m256i)(W), \
13224 (__mmask8)(U)))
13225
13226#define _mm256_maskz_rol_epi32(U, A, B) \
13227 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
13228 (__v8si)(__m256i)_mm256_setzero_si256(),\
13229 (__mmask8)(U)))
13230
13231#define _mm_rol_epi32(A, B) \
13232 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
13233 (__v4si)(__m128i)_mm_setzero_si128 (), \
13234 (__mmask8)-1))
13235
13236#define _mm_mask_rol_epi32(W, U, A, B) \
13237 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
13238 (__v4si)(__m128i)(W), \
13239 (__mmask8)(U)))
13240
13241#define _mm_maskz_rol_epi32(U, A, B) \
13242 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
13243 (__v4si)(__m128i)_mm_setzero_si128 (), \
13244 (__mmask8)(U)))
13245
13246#define _mm256_ror_epi32(A, B) \
13247 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
13248 (__v8si)(__m256i)_mm256_setzero_si256(),\
13249 (__mmask8)-1))
13250
13251#define _mm256_mask_ror_epi32(W, U, A, B) \
13252 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
13253 (__v8si)(__m256i)(W), \
13254 (__mmask8)(U)))
13255
13256#define _mm256_maskz_ror_epi32(U, A, B) \
13257 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
13258 (__v8si)(__m256i)_mm256_setzero_si256(),\
13259 (__mmask8)(U)))
13260
13261#define _mm_ror_epi32(A, B) \
13262 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
13263 (__v4si)(__m128i)_mm_setzero_si128 (), \
13264 (__mmask8)-1))
13265
13266#define _mm_mask_ror_epi32(W, U, A, B) \
13267 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
13268 (__v4si)(__m128i)(W), \
13269 (__mmask8)(U)))
13270
13271#define _mm_maskz_ror_epi32(U, A, B) \
13272 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
13273 (__v4si)(__m128i)_mm_setzero_si128 (), \
13274 (__mmask8)(U)))
13275
13276#define _mm256_alignr_epi32(X, Y, C) \
13277 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13278 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(X), (__mmask8)-1))
13279
13280#define _mm256_mask_alignr_epi32(W, U, X, Y, C) \
13281 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13282 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(W), (__mmask8)(U)))
13283
13284#define _mm256_maskz_alignr_epi32(U, X, Y, C) \
13285 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13286 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\
13287 (__mmask8)(U)))
13288
13289#define _mm256_alignr_epi64(X, Y, C) \
13290 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13291 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(X), (__mmask8)-1))
13292
13293#define _mm256_mask_alignr_epi64(W, U, X, Y, C) \
13294 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13295 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(W), (__mmask8)(U)))
13296
13297#define _mm256_maskz_alignr_epi64(U, X, Y, C) \
13298 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13299 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\
13300 (__mmask8)(U)))
13301
13302#define _mm_alignr_epi32(X, Y, C) \
13303 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13304 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(X), (__mmask8)-1))
13305
13306#define _mm_mask_alignr_epi32(W, U, X, Y, C) \
13307 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13308 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(W), (__mmask8)(U)))
13309
13310#define _mm_maskz_alignr_epi32(U, X, Y, C) \
13311 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13312 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128(),\
13313 (__mmask8)(U)))
13314
13315#define _mm_alignr_epi64(X, Y, C) \
13316 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13317 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13318
13319#define _mm_mask_alignr_epi64(W, U, X, Y, C) \
13320 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13321 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13322
13323#define _mm_maskz_alignr_epi64(U, X, Y, C) \
13324 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13325 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128(),\
13326 (__mmask8)(U)))
13327
13328#define _mm_mask_cvtps_ph(W, U, A, I) \
13329 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
13330 (__v8hi)(__m128i) (W), (__mmask8) (U)))
13331
13332#define _mm_maskz_cvtps_ph(U, A, I) \
13333 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
13334 (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
13335
13336#define _mm256_mask_cvtps_ph(W, U, A, I) \
13337 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
13338 (__v8hi)(__m128i) (W), (__mmask8) (U)))
13339
13340#define _mm256_maskz_cvtps_ph(U, A, I) \
13341 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
13342 (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
13343
13344#define _mm256_mask_srai_epi32(W, U, A, B) \
13345 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
13346 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
13347
13348#define _mm256_maskz_srai_epi32(U, A, B) \
13349 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
13350 (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
13351
13352#define _mm_mask_srai_epi32(W, U, A, B) \
13353 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
13354 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
13355
13356#define _mm_maskz_srai_epi32(U, A, B) \
13357 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
13358 (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
13359
13360#define _mm256_srai_epi64(A, B) \
13361 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13362 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
13363
13364#define _mm256_mask_srai_epi64(W, U, A, B) \
13365 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13366 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
13367
13368#define _mm256_maskz_srai_epi64(U, A, B) \
13369 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13370 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
13371
13372#define _mm_srai_epi64(A, B) \
13373 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
13374 (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)-1))
13375
13376#define _mm_mask_srai_epi64(W, U, A, B) \
13377 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
13378 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
13379
13380#define _mm_maskz_srai_epi64(U, A, B) \
13381 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
13382 (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
13383
13384#define _mm256_mask_permutex_pd(W, U, A, B) \
13385 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
13386 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
13387
13388#define _mm256_maskz_permutex_pd(U, A, B) \
13389 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
13390 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
13391
13392#define _mm256_mask_permute_pd(W, U, X, C) \
13393 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
13394 (__v4df)(__m256d)(W), \
13395 (__mmask8)(U)))
13396
13397#define _mm256_maskz_permute_pd(U, X, C) \
13398 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
13399 (__v4df)(__m256d)_mm256_setzero_pd(), \
13400 (__mmask8)(U)))
13401
13402#define _mm256_mask_permute_ps(W, U, X, C) \
13403 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
13404 (__v8sf)(__m256)(W), (__mmask8)(U)))
13405
13406#define _mm256_maskz_permute_ps(U, X, C) \
13407 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
13408 (__v8sf)(__m256)_mm256_setzero_ps(), \
13409 (__mmask8)(U)))
13410
13411#define _mm_mask_permute_pd(W, U, X, C) \
13412 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
13413 (__v2df)(__m128d)(W), (__mmask8)(U)))
13414
13415#define _mm_maskz_permute_pd(U, X, C) \
13416 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
13417 (__v2df)(__m128d)_mm_setzero_pd(), \
13418 (__mmask8)(U)))
13419
13420#define _mm_mask_permute_ps(W, U, X, C) \
13421 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
13422 (__v4sf)(__m128)(W), (__mmask8)(U)))
13423
13424#define _mm_maskz_permute_ps(U, X, C) \
13425 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
13426 (__v4sf)(__m128)_mm_setzero_ps(), \
13427 (__mmask8)(U)))
13428
13429#define _mm256_mask_blend_pd(__U, __A, __W) \
13430 ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A), \
13431 (__v4df) (__W), \
13432 (__mmask8) (__U)))
13433
13434#define _mm256_mask_blend_ps(__U, __A, __W) \
13435 ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A), \
13436 (__v8sf) (__W), \
13437 (__mmask8) (__U)))
13438
13439#define _mm256_mask_blend_epi64(__U, __A, __W) \
13440 ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A), \
13441 (__v4di) (__W), \
13442 (__mmask8) (__U)))
13443
13444#define _mm256_mask_blend_epi32(__U, __A, __W) \
13445 ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A), \
13446 (__v8si) (__W), \
13447 (__mmask8) (__U)))
13448
13449#define _mm_mask_blend_pd(__U, __A, __W) \
13450 ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A), \
13451 (__v2df) (__W), \
13452 (__mmask8) (__U)))
13453
13454#define _mm_mask_blend_ps(__U, __A, __W) \
13455 ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A), \
13456 (__v4sf) (__W), \
13457 (__mmask8) (__U)))
13458
13459#define _mm_mask_blend_epi64(__U, __A, __W) \
13460 ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A), \
13461 (__v2di) (__W), \
13462 (__mmask8) (__U)))
13463
13464#define _mm_mask_blend_epi32(__U, __A, __W) \
13465 ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A), \
13466 (__v4si) (__W), \
13467 (__mmask8) (__U)))
13468
13469#define _mm256_cmp_epu32_mask(X, Y, P) \
13470 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
13471 (__v8si)(__m256i)(Y), (int)(P),\
13472 (__mmask8)-1))
13473
13474#define _mm256_cmp_epi64_mask(X, Y, P) \
13475 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
13476 (__v4di)(__m256i)(Y), (int)(P),\
13477 (__mmask8)-1))
13478
13479#define _mm256_cmp_epi32_mask(X, Y, P) \
13480 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
13481 (__v8si)(__m256i)(Y), (int)(P),\
13482 (__mmask8)-1))
13483
13484#define _mm256_cmp_epu64_mask(X, Y, P) \
13485 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
13486 (__v4di)(__m256i)(Y), (int)(P),\
13487 (__mmask8)-1))
13488
13489#define _mm256_cmp_pd_mask(X, Y, P) \
13490 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
13491 (__v4df)(__m256d)(Y), (int)(P),\
13492 (__mmask8)-1))
13493
13494#define _mm256_cmp_ps_mask(X, Y, P) \
13495 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
13496 (__v8sf)(__m256)(Y), (int)(P),\
13497 (__mmask8)-1))
13498
13499#define _mm256_mask_cmp_epi64_mask(M, X, Y, P) \
13500 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
13501 (__v4di)(__m256i)(Y), (int)(P),\
13502 (__mmask8)(M)))
13503
13504#define _mm256_mask_cmp_epi32_mask(M, X, Y, P) \
13505 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
13506 (__v8si)(__m256i)(Y), (int)(P),\
13507 (__mmask8)(M)))
13508
13509#define _mm256_mask_cmp_epu64_mask(M, X, Y, P) \
13510 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
13511 (__v4di)(__m256i)(Y), (int)(P),\
13512 (__mmask8)(M)))
13513
13514#define _mm256_mask_cmp_epu32_mask(M, X, Y, P) \
13515 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
13516 (__v8si)(__m256i)(Y), (int)(P),\
13517 (__mmask8)(M)))
13518
13519#define _mm256_mask_cmp_pd_mask(M, X, Y, P) \
13520 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
13521 (__v4df)(__m256d)(Y), (int)(P),\
13522 (__mmask8)(M)))
13523
13524#define _mm256_mask_cmp_ps_mask(M, X, Y, P) \
13525 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
13526 (__v8sf)(__m256)(Y), (int)(P),\
13527 (__mmask8)(M)))
13528
13529#define _mm_cmp_epi64_mask(X, Y, P) \
13530 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
13531 (__v2di)(__m128i)(Y), (int)(P),\
13532 (__mmask8)-1))
13533
13534#define _mm_cmp_epi32_mask(X, Y, P) \
13535 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
13536 (__v4si)(__m128i)(Y), (int)(P),\
13537 (__mmask8)-1))
13538
13539#define _mm_cmp_epu64_mask(X, Y, P) \
13540 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
13541 (__v2di)(__m128i)(Y), (int)(P),\
13542 (__mmask8)-1))
13543
13544#define _mm_cmp_epu32_mask(X, Y, P) \
13545 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
13546 (__v4si)(__m128i)(Y), (int)(P),\
13547 (__mmask8)-1))
13548
13549#define _mm_cmp_pd_mask(X, Y, P) \
13550 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
13551 (__v2df)(__m128d)(Y), (int)(P),\
13552 (__mmask8)-1))
13553
13554#define _mm_cmp_ps_mask(X, Y, P) \
13555 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
13556 (__v4sf)(__m128)(Y), (int)(P),\
13557 (__mmask8)-1))
13558
13559#define _mm_mask_cmp_epi64_mask(M, X, Y, P) \
13560 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
13561 (__v2di)(__m128i)(Y), (int)(P),\
13562 (__mmask8)(M)))
13563
13564#define _mm_mask_cmp_epi32_mask(M, X, Y, P) \
13565 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
13566 (__v4si)(__m128i)(Y), (int)(P),\
13567 (__mmask8)(M)))
13568
13569#define _mm_mask_cmp_epu64_mask(M, X, Y, P) \
13570 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
13571 (__v2di)(__m128i)(Y), (int)(P),\
13572 (__mmask8)(M)))
13573
13574#define _mm_mask_cmp_epu32_mask(M, X, Y, P) \
13575 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
13576 (__v4si)(__m128i)(Y), (int)(P),\
13577 (__mmask8)(M)))
13578
13579#define _mm_mask_cmp_pd_mask(M, X, Y, P) \
13580 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
13581 (__v2df)(__m128d)(Y), (int)(P),\
13582 (__mmask8)(M)))
13583
13584#define _mm_mask_cmp_ps_mask(M, X, Y, P) \
13585 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
13586 (__v4sf)(__m128)(Y), (int)(P),\
13587 (__mmask8)(M)))
13588
13589#endif
13590
13591#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
13592
13593#ifdef __DISABLE_AVX512VL__
13594#undef __DISABLE_AVX512VL__
13595#pragma GCC pop_options
13596#endif /* __DISABLE_AVX512VL__ */
13597
13598#endif /* _AVX512VLINTRIN_H_INCLUDED */