]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/avx512vlintrin.h
Update copyright years.
[thirdparty/gcc.git] / gcc / config / i386 / avx512vlintrin.h
1 /* Copyright (C) 2014-2016 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef _AVX512VLINTRIN_H_INCLUDED
29 #define _AVX512VLINTRIN_H_INCLUDED
30
31 /* Doesn't require avx512vl target and is used in avx512dqintrin.h. */
32 extern __inline __m128i
33 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
34 _mm_setzero_di (void)
35 {
36 return __extension__ (__m128i)(__v2di){ 0LL, 0LL};
37 }
38
39 #ifndef __AVX512VL__
40 #pragma GCC push_options
41 #pragma GCC target("avx512vl")
42 #define __DISABLE_AVX512VL__
43 #endif /* __AVX512VL__ */
44
45 /* Internal data types for implementing the intrinsics. */
46 typedef unsigned int __mmask32;
47
48 extern __inline __m256d
49 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
50 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
51 {
52 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
53 (__v4df) __W,
54 (__mmask8) __U);
55 }
56
57 extern __inline __m256d
58 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
59 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
60 {
61 return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
62 (__v4df)
63 _mm256_setzero_pd (),
64 (__mmask8) __U);
65 }
66
67 extern __inline __m128d
68 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
69 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
70 {
71 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
72 (__v2df) __W,
73 (__mmask8) __U);
74 }
75
76 extern __inline __m128d
77 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
78 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
79 {
80 return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
81 (__v2df)
82 _mm_setzero_pd (),
83 (__mmask8) __U);
84 }
85
86 extern __inline __m256d
87 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
88 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
89 {
90 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
91 (__v4df) __W,
92 (__mmask8) __U);
93 }
94
95 extern __inline __m256d
96 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
97 _mm256_maskz_load_pd (__mmask8 __U, void const *__P)
98 {
99 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
100 (__v4df)
101 _mm256_setzero_pd (),
102 (__mmask8) __U);
103 }
104
105 extern __inline __m128d
106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
107 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
108 {
109 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
110 (__v2df) __W,
111 (__mmask8) __U);
112 }
113
114 extern __inline __m128d
115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
116 _mm_maskz_load_pd (__mmask8 __U, void const *__P)
117 {
118 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
119 (__v2df)
120 _mm_setzero_pd (),
121 (__mmask8) __U);
122 }
123
124 extern __inline void
125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
127 {
128 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
129 (__v4df) __A,
130 (__mmask8) __U);
131 }
132
133 extern __inline void
134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
135 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
136 {
137 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
138 (__v2df) __A,
139 (__mmask8) __U);
140 }
141
142 extern __inline __m256
143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
144 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
145 {
146 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
147 (__v8sf) __W,
148 (__mmask8) __U);
149 }
150
151 extern __inline __m256
152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
153 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
154 {
155 return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
156 (__v8sf)
157 _mm256_setzero_ps (),
158 (__mmask8) __U);
159 }
160
161 extern __inline __m128
162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
163 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
164 {
165 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
166 (__v4sf) __W,
167 (__mmask8) __U);
168 }
169
170 extern __inline __m128
171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
172 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
173 {
174 return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
175 (__v4sf)
176 _mm_setzero_ps (),
177 (__mmask8) __U);
178 }
179
180 extern __inline __m256
181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
182 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
183 {
184 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
185 (__v8sf) __W,
186 (__mmask8) __U);
187 }
188
189 extern __inline __m256
190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
191 _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
192 {
193 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
194 (__v8sf)
195 _mm256_setzero_ps (),
196 (__mmask8) __U);
197 }
198
199 extern __inline __m128
200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
201 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
202 {
203 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
204 (__v4sf) __W,
205 (__mmask8) __U);
206 }
207
208 extern __inline __m128
209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
210 _mm_maskz_load_ps (__mmask8 __U, void const *__P)
211 {
212 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
213 (__v4sf)
214 _mm_setzero_ps (),
215 (__mmask8) __U);
216 }
217
218 extern __inline void
219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
220 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
221 {
222 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
223 (__v8sf) __A,
224 (__mmask8) __U);
225 }
226
227 extern __inline void
228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
229 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
230 {
231 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
232 (__v4sf) __A,
233 (__mmask8) __U);
234 }
235
236 extern __inline __m256i
237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
238 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
239 {
240 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
241 (__v4di) __W,
242 (__mmask8) __U);
243 }
244
245 extern __inline __m256i
246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
247 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
248 {
249 return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
250 (__v4di)
251 _mm256_setzero_si256 (),
252 (__mmask8) __U);
253 }
254
255 extern __inline __m128i
256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
257 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
258 {
259 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
260 (__v2di) __W,
261 (__mmask8) __U);
262 }
263
264 extern __inline __m128i
265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
266 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
267 {
268 return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
269 (__v2di)
270 _mm_setzero_di (),
271 (__mmask8) __U);
272 }
273
274 extern __inline __m256i
275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
276 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
277 {
278 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
279 (__v4di) __W,
280 (__mmask8)
281 __U);
282 }
283
284 extern __inline __m256i
285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
286 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
287 {
288 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
289 (__v4di)
290 _mm256_setzero_si256 (),
291 (__mmask8)
292 __U);
293 }
294
295 extern __inline __m128i
296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
297 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
298 {
299 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
300 (__v2di) __W,
301 (__mmask8)
302 __U);
303 }
304
305 extern __inline __m128i
306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
307 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
308 {
309 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
310 (__v2di)
311 _mm_setzero_di (),
312 (__mmask8)
313 __U);
314 }
315
316 extern __inline void
317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
318 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
319 {
320 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
321 (__v4di) __A,
322 (__mmask8) __U);
323 }
324
325 extern __inline void
326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
327 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
328 {
329 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
330 (__v2di) __A,
331 (__mmask8) __U);
332 }
333
334 extern __inline __m256i
335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
336 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
337 {
338 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
339 (__v8si) __W,
340 (__mmask8) __U);
341 }
342
343 extern __inline __m256i
344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
345 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
346 {
347 return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
348 (__v8si)
349 _mm256_setzero_si256 (),
350 (__mmask8) __U);
351 }
352
353 extern __inline __m128i
354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
355 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
356 {
357 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
358 (__v4si) __W,
359 (__mmask8) __U);
360 }
361
362 extern __inline __m128i
363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
364 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
365 {
366 return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
367 (__v4si)
368 _mm_setzero_si128 (),
369 (__mmask8) __U);
370 }
371
372 extern __inline __m256i
373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
374 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
375 {
376 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
377 (__v8si) __W,
378 (__mmask8)
379 __U);
380 }
381
382 extern __inline __m256i
383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
384 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
385 {
386 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
387 (__v8si)
388 _mm256_setzero_si256 (),
389 (__mmask8)
390 __U);
391 }
392
393 extern __inline __m128i
394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
395 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
396 {
397 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
398 (__v4si) __W,
399 (__mmask8)
400 __U);
401 }
402
403 extern __inline __m128i
404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
405 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
406 {
407 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
408 (__v4si)
409 _mm_setzero_si128 (),
410 (__mmask8)
411 __U);
412 }
413
414 extern __inline void
415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
416 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
417 {
418 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
419 (__v8si) __A,
420 (__mmask8) __U);
421 }
422
423 extern __inline void
424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
425 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
426 {
427 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
428 (__v4si) __A,
429 (__mmask8) __U);
430 }
431
432 extern __inline __m128i
433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
434 _mm_setzero_hi (void)
435 {
436 return __extension__ (__m128i) (__v8hi)
437 {
438 0, 0, 0, 0, 0, 0, 0, 0};
439 }
440
441 extern __inline __m128d
442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
443 _mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
444 {
445 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
446 (__v2df) __B,
447 (__v2df) __W,
448 (__mmask8) __U);
449 }
450
451 extern __inline __m128d
452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
453 _mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B)
454 {
455 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
456 (__v2df) __B,
457 (__v2df)
458 _mm_setzero_pd (),
459 (__mmask8) __U);
460 }
461
462 extern __inline __m256d
463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
464 _mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A,
465 __m256d __B)
466 {
467 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
468 (__v4df) __B,
469 (__v4df) __W,
470 (__mmask8) __U);
471 }
472
473 extern __inline __m256d
474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
475 _mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
476 {
477 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
478 (__v4df) __B,
479 (__v4df)
480 _mm256_setzero_pd (),
481 (__mmask8) __U);
482 }
483
484 extern __inline __m128
485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
486 _mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
487 {
488 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
489 (__v4sf) __B,
490 (__v4sf) __W,
491 (__mmask8) __U);
492 }
493
494 extern __inline __m128
495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
496 _mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B)
497 {
498 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
499 (__v4sf) __B,
500 (__v4sf)
501 _mm_setzero_ps (),
502 (__mmask8) __U);
503 }
504
505 extern __inline __m256
506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
507 _mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
508 {
509 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
510 (__v8sf) __B,
511 (__v8sf) __W,
512 (__mmask8) __U);
513 }
514
515 extern __inline __m256
516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
517 _mm256_maskz_add_ps (__mmask16 __U, __m256 __A, __m256 __B)
518 {
519 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
520 (__v8sf) __B,
521 (__v8sf)
522 _mm256_setzero_ps (),
523 (__mmask8) __U);
524 }
525
526 extern __inline __m128d
527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
528 _mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
529 {
530 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
531 (__v2df) __B,
532 (__v2df) __W,
533 (__mmask8) __U);
534 }
535
536 extern __inline __m128d
537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
538 _mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B)
539 {
540 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
541 (__v2df) __B,
542 (__v2df)
543 _mm_setzero_pd (),
544 (__mmask8) __U);
545 }
546
547 extern __inline __m256d
548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
549 _mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
550 __m256d __B)
551 {
552 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
553 (__v4df) __B,
554 (__v4df) __W,
555 (__mmask8) __U);
556 }
557
558 extern __inline __m256d
559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
560 _mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
561 {
562 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
563 (__v4df) __B,
564 (__v4df)
565 _mm256_setzero_pd (),
566 (__mmask8) __U);
567 }
568
569 extern __inline __m128
570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
571 _mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
572 {
573 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
574 (__v4sf) __B,
575 (__v4sf) __W,
576 (__mmask8) __U);
577 }
578
579 extern __inline __m128
580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
581 _mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B)
582 {
583 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
584 (__v4sf) __B,
585 (__v4sf)
586 _mm_setzero_ps (),
587 (__mmask8) __U);
588 }
589
590 extern __inline __m256
591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
592 _mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
593 {
594 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
595 (__v8sf) __B,
596 (__v8sf) __W,
597 (__mmask8) __U);
598 }
599
600 extern __inline __m256
601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
602 _mm256_maskz_sub_ps (__mmask16 __U, __m256 __A, __m256 __B)
603 {
604 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
605 (__v8sf) __B,
606 (__v8sf)
607 _mm256_setzero_ps (),
608 (__mmask8) __U);
609 }
610
611 extern __inline void
612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
613 _mm256_store_epi64 (void *__P, __m256i __A)
614 {
615 *(__m256i *) __P = __A;
616 }
617
618 extern __inline void
619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
620 _mm_store_epi64 (void *__P, __m128i __A)
621 {
622 *(__m128i *) __P = __A;
623 }
624
625 extern __inline __m256d
626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
627 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
628 {
629 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
630 (__v4df) __W,
631 (__mmask8) __U);
632 }
633
634 extern __inline __m256d
635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
636 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
637 {
638 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
639 (__v4df)
640 _mm256_setzero_pd (),
641 (__mmask8) __U);
642 }
643
644 extern __inline __m128d
645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
646 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
647 {
648 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
649 (__v2df) __W,
650 (__mmask8) __U);
651 }
652
653 extern __inline __m128d
654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
655 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
656 {
657 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
658 (__v2df)
659 _mm_setzero_pd (),
660 (__mmask8) __U);
661 }
662
663 extern __inline void
664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
665 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
666 {
667 __builtin_ia32_storeupd256_mask ((__v4df *) __P,
668 (__v4df) __A,
669 (__mmask8) __U);
670 }
671
672 extern __inline void
673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
674 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
675 {
676 __builtin_ia32_storeupd128_mask ((__v2df *) __P,
677 (__v2df) __A,
678 (__mmask8) __U);
679 }
680
681 extern __inline __m256
682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
683 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
684 {
685 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
686 (__v8sf) __W,
687 (__mmask8) __U);
688 }
689
690 extern __inline __m256
691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
692 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
693 {
694 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
695 (__v8sf)
696 _mm256_setzero_ps (),
697 (__mmask8) __U);
698 }
699
700 extern __inline __m128
701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
702 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
703 {
704 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
705 (__v4sf) __W,
706 (__mmask8) __U);
707 }
708
709 extern __inline __m128
710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
711 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
712 {
713 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
714 (__v4sf)
715 _mm_setzero_ps (),
716 (__mmask8) __U);
717 }
718
719 extern __inline void
720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
721 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
722 {
723 __builtin_ia32_storeups256_mask ((__v8sf *) __P,
724 (__v8sf) __A,
725 (__mmask8) __U);
726 }
727
728 extern __inline void
729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
730 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
731 {
732 __builtin_ia32_storeups128_mask ((__v4sf *) __P,
733 (__v4sf) __A,
734 (__mmask8) __U);
735 }
736
737 extern __inline __m256i
738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
739 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
740 {
741 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
742 (__v4di) __W,
743 (__mmask8) __U);
744 }
745
746 extern __inline __m256i
747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
748 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
749 {
750 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
751 (__v4di)
752 _mm256_setzero_si256 (),
753 (__mmask8) __U);
754 }
755
756 extern __inline __m128i
757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
758 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
759 {
760 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
761 (__v2di) __W,
762 (__mmask8) __U);
763 }
764
765 extern __inline __m128i
766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
767 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
768 {
769 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
770 (__v2di)
771 _mm_setzero_di (),
772 (__mmask8) __U);
773 }
774
775 extern __inline void
776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
777 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
778 {
779 __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
780 (__v4di) __A,
781 (__mmask8) __U);
782 }
783
784 extern __inline void
785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
786 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
787 {
788 __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
789 (__v2di) __A,
790 (__mmask8) __U);
791 }
792
793 extern __inline __m256i
794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
795 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
796 {
797 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
798 (__v8si) __W,
799 (__mmask8) __U);
800 }
801
802 extern __inline __m256i
803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
804 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
805 {
806 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
807 (__v8si)
808 _mm256_setzero_si256 (),
809 (__mmask8) __U);
810 }
811
812 extern __inline __m128i
813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
814 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
815 {
816 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
817 (__v4si) __W,
818 (__mmask8) __U);
819 }
820
821 extern __inline __m128i
822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
823 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
824 {
825 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
826 (__v4si)
827 _mm_setzero_si128 (),
828 (__mmask8) __U);
829 }
830
831 extern __inline void
832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
833 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
834 {
835 __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
836 (__v8si) __A,
837 (__mmask8) __U);
838 }
839
840 extern __inline void
841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
842 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
843 {
844 __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
845 (__v4si) __A,
846 (__mmask8) __U);
847 }
848
849 extern __inline __m256i
850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
851 _mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
852 {
853 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
854 (__v8si) __W,
855 (__mmask8) __U);
856 }
857
858 extern __inline __m256i
859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
860 _mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A)
861 {
862 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
863 (__v8si)
864 _mm256_setzero_si256 (),
865 (__mmask8) __U);
866 }
867
868 extern __inline __m128i
869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
870 _mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
871 {
872 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
873 (__v4si) __W,
874 (__mmask8) __U);
875 }
876
877 extern __inline __m128i
878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
879 _mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A)
880 {
881 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
882 (__v4si)
883 _mm_setzero_si128 (),
884 (__mmask8) __U);
885 }
886
887 extern __inline __m256i
888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
889 _mm256_abs_epi64 (__m256i __A)
890 {
891 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
892 (__v4di)
893 _mm256_setzero_si256 (),
894 (__mmask8) -1);
895 }
896
897 extern __inline __m256i
898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
899 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
900 {
901 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
902 (__v4di) __W,
903 (__mmask8) __U);
904 }
905
906 extern __inline __m256i
907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
908 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A)
909 {
910 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
911 (__v4di)
912 _mm256_setzero_si256 (),
913 (__mmask8) __U);
914 }
915
916 extern __inline __m128i
917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
918 _mm_abs_epi64 (__m128i __A)
919 {
920 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
921 (__v2di)
922 _mm_setzero_di (),
923 (__mmask8) -1);
924 }
925
926 extern __inline __m128i
927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
928 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
929 {
930 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
931 (__v2di) __W,
932 (__mmask8) __U);
933 }
934
935 extern __inline __m128i
936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
937 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
938 {
939 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
940 (__v2di)
941 _mm_setzero_di (),
942 (__mmask8) __U);
943 }
944
945 extern __inline __m128i
946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
947 _mm256_cvtpd_epu32 (__m256d __A)
948 {
949 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
950 (__v4si)
951 _mm_setzero_si128 (),
952 (__mmask8) -1);
953 }
954
955 extern __inline __m128i
956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
957 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
958 {
959 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
960 (__v4si) __W,
961 (__mmask8) __U);
962 }
963
964 extern __inline __m128i
965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
966 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A)
967 {
968 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
969 (__v4si)
970 _mm_setzero_si128 (),
971 (__mmask8) __U);
972 }
973
974 extern __inline __m128i
975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
976 _mm_cvtpd_epu32 (__m128d __A)
977 {
978 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
979 (__v4si)
980 _mm_setzero_si128 (),
981 (__mmask8) -1);
982 }
983
984 extern __inline __m128i
985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
986 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
987 {
988 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
989 (__v4si) __W,
990 (__mmask8) __U);
991 }
992
993 extern __inline __m128i
994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
995 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A)
996 {
997 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
998 (__v4si)
999 _mm_setzero_si128 (),
1000 (__mmask8) __U);
1001 }
1002
1003 extern __inline __m256i
1004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1005 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
1006 {
1007 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1008 (__v8si) __W,
1009 (__mmask8) __U);
1010 }
1011
1012 extern __inline __m256i
1013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1014 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A)
1015 {
1016 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1017 (__v8si)
1018 _mm256_setzero_si256 (),
1019 (__mmask8) __U);
1020 }
1021
1022 extern __inline __m128i
1023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1024 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
1025 {
1026 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1027 (__v4si) __W,
1028 (__mmask8) __U);
1029 }
1030
1031 extern __inline __m128i
1032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1033 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A)
1034 {
1035 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1036 (__v4si)
1037 _mm_setzero_si128 (),
1038 (__mmask8) __U);
1039 }
1040
1041 extern __inline __m256i
1042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1043 _mm256_cvttps_epu32 (__m256 __A)
1044 {
1045 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1046 (__v8si)
1047 _mm256_setzero_si256 (),
1048 (__mmask8) -1);
1049 }
1050
1051 extern __inline __m256i
1052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1053 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
1054 {
1055 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1056 (__v8si) __W,
1057 (__mmask8) __U);
1058 }
1059
1060 extern __inline __m256i
1061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1062 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A)
1063 {
1064 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1065 (__v8si)
1066 _mm256_setzero_si256 (),
1067 (__mmask8) __U);
1068 }
1069
1070 extern __inline __m128i
1071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1072 _mm_cvttps_epu32 (__m128 __A)
1073 {
1074 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1075 (__v4si)
1076 _mm_setzero_si128 (),
1077 (__mmask8) -1);
1078 }
1079
1080 extern __inline __m128i
1081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
1083 {
1084 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1085 (__v4si) __W,
1086 (__mmask8) __U);
1087 }
1088
1089 extern __inline __m128i
1090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1091 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A)
1092 {
1093 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1094 (__v4si)
1095 _mm_setzero_si128 (),
1096 (__mmask8) __U);
1097 }
1098
1099 extern __inline __m128i
1100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1101 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1102 {
1103 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1104 (__v4si) __W,
1105 (__mmask8) __U);
1106 }
1107
1108 extern __inline __m128i
1109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1110 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A)
1111 {
1112 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1113 (__v4si)
1114 _mm_setzero_si128 (),
1115 (__mmask8) __U);
1116 }
1117
1118 extern __inline __m128i
1119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1120 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1121 {
1122 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1123 (__v4si) __W,
1124 (__mmask8) __U);
1125 }
1126
1127 extern __inline __m128i
1128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1129 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A)
1130 {
1131 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1132 (__v4si)
1133 _mm_setzero_si128 (),
1134 (__mmask8) __U);
1135 }
1136
1137 extern __inline __m128i
1138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1139 _mm256_cvttpd_epu32 (__m256d __A)
1140 {
1141 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1142 (__v4si)
1143 _mm_setzero_si128 (),
1144 (__mmask8) -1);
1145 }
1146
1147 extern __inline __m128i
1148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1149 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
1150 {
1151 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1152 (__v4si) __W,
1153 (__mmask8) __U);
1154 }
1155
1156 extern __inline __m128i
1157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1158 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A)
1159 {
1160 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1161 (__v4si)
1162 _mm_setzero_si128 (),
1163 (__mmask8) __U);
1164 }
1165
1166 extern __inline __m128i
1167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1168 _mm_cvttpd_epu32 (__m128d __A)
1169 {
1170 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1171 (__v4si)
1172 _mm_setzero_si128 (),
1173 (__mmask8) -1);
1174 }
1175
1176 extern __inline __m128i
1177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1178 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
1179 {
1180 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1181 (__v4si) __W,
1182 (__mmask8) __U);
1183 }
1184
1185 extern __inline __m128i
1186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1187 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A)
1188 {
1189 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1190 (__v4si)
1191 _mm_setzero_si128 (),
1192 (__mmask8) __U);
1193 }
1194
1195 extern __inline __m128i
1196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1197 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1198 {
1199 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1200 (__v4si) __W,
1201 (__mmask8) __U);
1202 }
1203
1204 extern __inline __m128i
1205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1206 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A)
1207 {
1208 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1209 (__v4si)
1210 _mm_setzero_si128 (),
1211 (__mmask8) __U);
1212 }
1213
1214 extern __inline __m128i
1215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1216 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1217 {
1218 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1219 (__v4si) __W,
1220 (__mmask8) __U);
1221 }
1222
1223 extern __inline __m128i
1224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1225 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A)
1226 {
1227 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1228 (__v4si)
1229 _mm_setzero_si128 (),
1230 (__mmask8) __U);
1231 }
1232
1233 extern __inline __m256d
1234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1235 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1236 {
1237 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1238 (__v4df) __W,
1239 (__mmask8) __U);
1240 }
1241
1242 extern __inline __m256d
1243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1244 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1245 {
1246 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1247 (__v4df)
1248 _mm256_setzero_pd (),
1249 (__mmask8) __U);
1250 }
1251
1252 extern __inline __m128d
1253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1254 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1255 {
1256 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1257 (__v2df) __W,
1258 (__mmask8) __U);
1259 }
1260
1261 extern __inline __m128d
1262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1263 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1264 {
1265 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1266 (__v2df)
1267 _mm_setzero_pd (),
1268 (__mmask8) __U);
1269 }
1270
1271 extern __inline __m256d
1272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1273 _mm256_cvtepu32_pd (__m128i __A)
1274 {
1275 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1276 (__v4df)
1277 _mm256_setzero_pd (),
1278 (__mmask8) -1);
1279 }
1280
1281 extern __inline __m256d
1282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1283 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1284 {
1285 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1286 (__v4df) __W,
1287 (__mmask8) __U);
1288 }
1289
1290 extern __inline __m256d
1291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1292 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1293 {
1294 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1295 (__v4df)
1296 _mm256_setzero_pd (),
1297 (__mmask8) __U);
1298 }
1299
1300 extern __inline __m128d
1301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1302 _mm_cvtepu32_pd (__m128i __A)
1303 {
1304 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1305 (__v2df)
1306 _mm_setzero_pd (),
1307 (__mmask8) -1);
1308 }
1309
1310 extern __inline __m128d
1311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1312 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1313 {
1314 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1315 (__v2df) __W,
1316 (__mmask8) __U);
1317 }
1318
1319 extern __inline __m128d
1320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1321 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1322 {
1323 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1324 (__v2df)
1325 _mm_setzero_pd (),
1326 (__mmask8) __U);
1327 }
1328
1329 extern __inline __m256
1330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1331 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1332 {
1333 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1334 (__v8sf) __W,
1335 (__mmask8) __U);
1336 }
1337
1338 extern __inline __m256
1339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1340 _mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A)
1341 {
1342 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1343 (__v8sf)
1344 _mm256_setzero_ps (),
1345 (__mmask8) __U);
1346 }
1347
1348 extern __inline __m128
1349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1350 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1351 {
1352 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1353 (__v4sf) __W,
1354 (__mmask8) __U);
1355 }
1356
1357 extern __inline __m128
1358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1359 _mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A)
1360 {
1361 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1362 (__v4sf)
1363 _mm_setzero_ps (),
1364 (__mmask8) __U);
1365 }
1366
1367 extern __inline __m256
1368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1369 _mm256_cvtepu32_ps (__m256i __A)
1370 {
1371 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1372 (__v8sf)
1373 _mm256_setzero_ps (),
1374 (__mmask8) -1);
1375 }
1376
1377 extern __inline __m256
1378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1379 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1380 {
1381 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1382 (__v8sf) __W,
1383 (__mmask8) __U);
1384 }
1385
1386 extern __inline __m256
1387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1388 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A)
1389 {
1390 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1391 (__v8sf)
1392 _mm256_setzero_ps (),
1393 (__mmask8) __U);
1394 }
1395
1396 extern __inline __m128
1397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1398 _mm_cvtepu32_ps (__m128i __A)
1399 {
1400 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1401 (__v4sf)
1402 _mm_setzero_ps (),
1403 (__mmask8) -1);
1404 }
1405
1406 extern __inline __m128
1407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1408 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1409 {
1410 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1411 (__v4sf) __W,
1412 (__mmask8) __U);
1413 }
1414
1415 extern __inline __m128
1416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1417 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A)
1418 {
1419 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1420 (__v4sf)
1421 _mm_setzero_ps (),
1422 (__mmask8) __U);
1423 }
1424
1425 extern __inline __m256d
1426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1427 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A)
1428 {
1429 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1430 (__v4df) __W,
1431 (__mmask8) __U);
1432 }
1433
1434 extern __inline __m256d
1435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1436 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1437 {
1438 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1439 (__v4df)
1440 _mm256_setzero_pd (),
1441 (__mmask8) __U);
1442 }
1443
1444 extern __inline __m128d
1445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1446 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A)
1447 {
1448 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1449 (__v2df) __W,
1450 (__mmask8) __U);
1451 }
1452
1453 extern __inline __m128d
1454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1455 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1456 {
1457 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1458 (__v2df)
1459 _mm_setzero_pd (),
1460 (__mmask8) __U);
1461 }
1462
1463 extern __inline __m128i
1464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1465 _mm_cvtepi32_epi8 (__m128i __A)
1466 {
1467 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1468 (__v16qi)_mm_undefined_si128(),
1469 (__mmask8) -1);
1470 }
1471
1472 extern __inline void
1473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1474 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1475 {
1476 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1477 }
1478
1479 extern __inline __m128i
1480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1481 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1482 {
1483 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1484 (__v16qi) __O, __M);
1485 }
1486
1487 extern __inline __m128i
1488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1489 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
1490 {
1491 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1492 (__v16qi)
1493 _mm_setzero_si128 (),
1494 __M);
1495 }
1496
1497 extern __inline __m128i
1498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1499 _mm256_cvtepi32_epi8 (__m256i __A)
1500 {
1501 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1502 (__v16qi)_mm_undefined_si128(),
1503 (__mmask8) -1);
1504 }
1505
1506 extern __inline __m128i
1507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1508 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1509 {
1510 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1511 (__v16qi) __O, __M);
1512 }
1513
1514 extern __inline void
1515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1516 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1517 {
1518 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1519 }
1520
1521 extern __inline __m128i
1522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1523 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
1524 {
1525 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1526 (__v16qi)
1527 _mm_setzero_si128 (),
1528 __M);
1529 }
1530
1531 extern __inline __m128i
1532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1533 _mm_cvtsepi32_epi8 (__m128i __A)
1534 {
1535 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1536 (__v16qi)_mm_undefined_si128(),
1537 (__mmask8) -1);
1538 }
1539
1540 extern __inline void
1541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1542 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1543 {
1544 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1545 }
1546
1547 extern __inline __m128i
1548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1549 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1550 {
1551 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1552 (__v16qi) __O, __M);
1553 }
1554
1555 extern __inline __m128i
1556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1557 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
1558 {
1559 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1560 (__v16qi)
1561 _mm_setzero_si128 (),
1562 __M);
1563 }
1564
1565 extern __inline __m128i
1566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1567 _mm256_cvtsepi32_epi8 (__m256i __A)
1568 {
1569 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1570 (__v16qi)_mm_undefined_si128(),
1571 (__mmask8) -1);
1572 }
1573
1574 extern __inline void
1575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1576 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1577 {
1578 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1579 }
1580
1581 extern __inline __m128i
1582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1583 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1584 {
1585 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1586 (__v16qi) __O, __M);
1587 }
1588
1589 extern __inline __m128i
1590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1591 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
1592 {
1593 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1594 (__v16qi)
1595 _mm_setzero_si128 (),
1596 __M);
1597 }
1598
1599 extern __inline __m128i
1600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1601 _mm_cvtusepi32_epi8 (__m128i __A)
1602 {
1603 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1604 (__v16qi)_mm_undefined_si128(),
1605 (__mmask8) -1);
1606 }
1607
1608 extern __inline void
1609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1610 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1611 {
1612 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1613 }
1614
1615 extern __inline __m128i
1616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1617 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1618 {
1619 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1620 (__v16qi) __O,
1621 __M);
1622 }
1623
1624 extern __inline __m128i
1625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1626 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
1627 {
1628 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1629 (__v16qi)
1630 _mm_setzero_si128 (),
1631 __M);
1632 }
1633
1634 extern __inline __m128i
1635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1636 _mm256_cvtusepi32_epi8 (__m256i __A)
1637 {
1638 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1639 (__v16qi)_mm_undefined_si128(),
1640 (__mmask8) -1);
1641 }
1642
1643 extern __inline void
1644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1645 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1646 {
1647 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
1648 }
1649
1650 extern __inline __m128i
1651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1652 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1653 {
1654 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1655 (__v16qi) __O,
1656 __M);
1657 }
1658
1659 extern __inline __m128i
1660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1661 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
1662 {
1663 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1664 (__v16qi)
1665 _mm_setzero_si128 (),
1666 __M);
1667 }
1668
1669 extern __inline __m128i
1670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1671 _mm_cvtepi32_epi16 (__m128i __A)
1672 {
1673 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1674 (__v8hi) _mm_setzero_si128 (),
1675 (__mmask8) -1);
1676 }
1677
1678 extern __inline void
1679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1680 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1681 {
1682 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1683 }
1684
1685 extern __inline __m128i
1686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1687 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1688 {
1689 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1690 (__v8hi) __O, __M);
1691 }
1692
1693 extern __inline __m128i
1694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1695 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
1696 {
1697 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1698 (__v8hi)
1699 _mm_setzero_si128 (),
1700 __M);
1701 }
1702
1703 extern __inline __m128i
1704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705 _mm256_cvtepi32_epi16 (__m256i __A)
1706 {
1707 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1708 (__v8hi)_mm_setzero_si128 (),
1709 (__mmask8) -1);
1710 }
1711
1712 extern __inline void
1713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1714 _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1715 {
1716 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1717 }
1718
1719 extern __inline __m128i
1720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1721 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1722 {
1723 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1724 (__v8hi) __O, __M);
1725 }
1726
1727 extern __inline __m128i
1728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1729 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
1730 {
1731 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1732 (__v8hi)
1733 _mm_setzero_si128 (),
1734 __M);
1735 }
1736
1737 extern __inline __m128i
1738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1739 _mm_cvtsepi32_epi16 (__m128i __A)
1740 {
1741 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1742 (__v8hi)_mm_setzero_si128 (),
1743 (__mmask8) -1);
1744 }
1745
1746 extern __inline void
1747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1748 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1749 {
1750 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1751 }
1752
1753 extern __inline __m128i
1754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1755 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1756 {
1757 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1758 (__v8hi)__O,
1759 __M);
1760 }
1761
1762 extern __inline __m128i
1763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1764 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
1765 {
1766 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1767 (__v8hi)
1768 _mm_setzero_si128 (),
1769 __M);
1770 }
1771
1772 extern __inline __m128i
1773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1774 _mm256_cvtsepi32_epi16 (__m256i __A)
1775 {
1776 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1777 (__v8hi)_mm_undefined_si128(),
1778 (__mmask8) -1);
1779 }
1780
1781 extern __inline void
1782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1783 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1784 {
1785 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1786 }
1787
1788 extern __inline __m128i
1789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1790 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1791 {
1792 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1793 (__v8hi) __O, __M);
1794 }
1795
1796 extern __inline __m128i
1797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1798 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
1799 {
1800 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1801 (__v8hi)
1802 _mm_setzero_si128 (),
1803 __M);
1804 }
1805
1806 extern __inline __m128i
1807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1808 _mm_cvtusepi32_epi16 (__m128i __A)
1809 {
1810 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1811 (__v8hi)_mm_undefined_si128(),
1812 (__mmask8) -1);
1813 }
1814
1815 extern __inline void
1816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1817 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1818 {
1819 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1820 }
1821
1822 extern __inline __m128i
1823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1824 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1825 {
1826 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1827 (__v8hi) __O, __M);
1828 }
1829
1830 extern __inline __m128i
1831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1832 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
1833 {
1834 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1835 (__v8hi)
1836 _mm_setzero_si128 (),
1837 __M);
1838 }
1839
1840 extern __inline __m128i
1841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1842 _mm256_cvtusepi32_epi16 (__m256i __A)
1843 {
1844 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1845 (__v8hi)_mm_undefined_si128(),
1846 (__mmask8) -1);
1847 }
1848
1849 extern __inline void
1850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1851 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1852 {
1853 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1854 }
1855
1856 extern __inline __m128i
1857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1858 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1859 {
1860 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1861 (__v8hi) __O, __M);
1862 }
1863
1864 extern __inline __m128i
1865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1866 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
1867 {
1868 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1869 (__v8hi)
1870 _mm_setzero_si128 (),
1871 __M);
1872 }
1873
1874 extern __inline __m128i
1875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1876 _mm_cvtepi64_epi8 (__m128i __A)
1877 {
1878 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1879 (__v16qi)_mm_undefined_si128(),
1880 (__mmask8) -1);
1881 }
1882
1883 extern __inline void
1884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1885 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1886 {
1887 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1888 }
1889
1890 extern __inline __m128i
1891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1892 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1893 {
1894 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1895 (__v16qi) __O, __M);
1896 }
1897
1898 extern __inline __m128i
1899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1900 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
1901 {
1902 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1903 (__v16qi)
1904 _mm_setzero_si128 (),
1905 __M);
1906 }
1907
1908 extern __inline __m128i
1909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1910 _mm256_cvtepi64_epi8 (__m256i __A)
1911 {
1912 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1913 (__v16qi)_mm_undefined_si128(),
1914 (__mmask8) -1);
1915 }
1916
1917 extern __inline void
1918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1919 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1920 {
1921 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1922 }
1923
1924 extern __inline __m128i
1925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1926 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1927 {
1928 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1929 (__v16qi) __O, __M);
1930 }
1931
1932 extern __inline __m128i
1933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1934 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
1935 {
1936 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1937 (__v16qi)
1938 _mm_setzero_si128 (),
1939 __M);
1940 }
1941
1942 extern __inline __m128i
1943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1944 _mm_cvtsepi64_epi8 (__m128i __A)
1945 {
1946 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1947 (__v16qi)_mm_undefined_si128(),
1948 (__mmask8) -1);
1949 }
1950
1951 extern __inline void
1952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1953 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1954 {
1955 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1956 }
1957
1958 extern __inline __m128i
1959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1960 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1961 {
1962 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1963 (__v16qi) __O, __M);
1964 }
1965
1966 extern __inline __m128i
1967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1968 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
1969 {
1970 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1971 (__v16qi)
1972 _mm_setzero_si128 (),
1973 __M);
1974 }
1975
1976 extern __inline __m128i
1977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1978 _mm256_cvtsepi64_epi8 (__m256i __A)
1979 {
1980 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
1981 (__v16qi)_mm_undefined_si128(),
1982 (__mmask8) -1);
1983 }
1984
1985 extern __inline void
1986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1987 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1988 {
1989 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1990 }
1991
1992 extern __inline __m128i
1993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1994 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1995 {
1996 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
1997 (__v16qi) __O, __M);
1998 }
1999
2000 extern __inline __m128i
2001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2002 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
2003 {
2004 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
2005 (__v16qi)
2006 _mm_setzero_si128 (),
2007 __M);
2008 }
2009
2010 extern __inline __m128i
2011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2012 _mm_cvtusepi64_epi8 (__m128i __A)
2013 {
2014 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2015 (__v16qi)_mm_undefined_si128(),
2016 (__mmask8) -1);
2017 }
2018
2019 extern __inline void
2020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2021 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
2022 {
2023 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
2024 }
2025
2026 extern __inline __m128i
2027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
2029 {
2030 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2031 (__v16qi) __O,
2032 __M);
2033 }
2034
2035 extern __inline __m128i
2036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2037 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
2038 {
2039 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2040 (__v16qi)
2041 _mm_setzero_si128 (),
2042 __M);
2043 }
2044
2045 extern __inline __m128i
2046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2047 _mm256_cvtusepi64_epi8 (__m256i __A)
2048 {
2049 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2050 (__v16qi)_mm_undefined_si128(),
2051 (__mmask8) -1);
2052 }
2053
2054 extern __inline void
2055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2056 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
2057 {
2058 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
2059 }
2060
2061 extern __inline __m128i
2062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2063 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
2064 {
2065 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2066 (__v16qi) __O,
2067 __M);
2068 }
2069
2070 extern __inline __m128i
2071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2072 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
2073 {
2074 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2075 (__v16qi)
2076 _mm_setzero_si128 (),
2077 __M);
2078 }
2079
2080 extern __inline __m128i
2081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2082 _mm_cvtepi64_epi16 (__m128i __A)
2083 {
2084 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2085 (__v8hi)_mm_undefined_si128(),
2086 (__mmask8) -1);
2087 }
2088
2089 extern __inline void
2090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2091 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2092 {
2093 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2094 }
2095
2096 extern __inline __m128i
2097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2098 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2099 {
2100 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2101 (__v8hi)__O,
2102 __M);
2103 }
2104
2105 extern __inline __m128i
2106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2107 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
2108 {
2109 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2110 (__v8hi)
2111 _mm_setzero_si128 (),
2112 __M);
2113 }
2114
2115 extern __inline __m128i
2116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2117 _mm256_cvtepi64_epi16 (__m256i __A)
2118 {
2119 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2120 (__v8hi)_mm_undefined_si128(),
2121 (__mmask8) -1);
2122 }
2123
2124 extern __inline void
2125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2126 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2127 {
2128 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2129 }
2130
2131 extern __inline __m128i
2132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2133 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2134 {
2135 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2136 (__v8hi) __O, __M);
2137 }
2138
2139 extern __inline __m128i
2140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2141 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
2142 {
2143 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2144 (__v8hi)
2145 _mm_setzero_si128 (),
2146 __M);
2147 }
2148
2149 extern __inline __m128i
2150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2151 _mm_cvtsepi64_epi16 (__m128i __A)
2152 {
2153 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2154 (__v8hi)_mm_undefined_si128(),
2155 (__mmask8) -1);
2156 }
2157
2158 extern __inline void
2159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2160 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2161 {
2162 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2163 }
2164
2165 extern __inline __m128i
2166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2167 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2168 {
2169 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2170 (__v8hi) __O, __M);
2171 }
2172
2173 extern __inline __m128i
2174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2175 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
2176 {
2177 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2178 (__v8hi)
2179 _mm_setzero_si128 (),
2180 __M);
2181 }
2182
2183 extern __inline __m128i
2184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2185 _mm256_cvtsepi64_epi16 (__m256i __A)
2186 {
2187 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2188 (__v8hi)_mm_undefined_si128(),
2189 (__mmask8) -1);
2190 }
2191
2192 extern __inline void
2193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2194 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2195 {
2196 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2197 }
2198
2199 extern __inline __m128i
2200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2201 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2202 {
2203 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2204 (__v8hi) __O, __M);
2205 }
2206
2207 extern __inline __m128i
2208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2209 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
2210 {
2211 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2212 (__v8hi)
2213 _mm_setzero_si128 (),
2214 __M);
2215 }
2216
2217 extern __inline __m128i
2218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2219 _mm_cvtusepi64_epi16 (__m128i __A)
2220 {
2221 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2222 (__v8hi)_mm_undefined_si128(),
2223 (__mmask8) -1);
2224 }
2225
2226 extern __inline void
2227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2228 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2229 {
2230 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2231 }
2232
2233 extern __inline __m128i
2234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2235 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2236 {
2237 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2238 (__v8hi) __O, __M);
2239 }
2240
2241 extern __inline __m128i
2242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2243 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
2244 {
2245 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2246 (__v8hi)
2247 _mm_setzero_si128 (),
2248 __M);
2249 }
2250
2251 extern __inline __m128i
2252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2253 _mm256_cvtusepi64_epi16 (__m256i __A)
2254 {
2255 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2256 (__v8hi)_mm_undefined_si128(),
2257 (__mmask8) -1);
2258 }
2259
2260 extern __inline void
2261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2262 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2263 {
2264 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2265 }
2266
2267 extern __inline __m128i
2268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2269 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2270 {
2271 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2272 (__v8hi) __O, __M);
2273 }
2274
2275 extern __inline __m128i
2276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2277 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
2278 {
2279 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2280 (__v8hi)
2281 _mm_setzero_si128 (),
2282 __M);
2283 }
2284
2285 extern __inline __m128i
2286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2287 _mm_cvtepi64_epi32 (__m128i __A)
2288 {
2289 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2290 (__v4si)_mm_undefined_si128(),
2291 (__mmask8) -1);
2292 }
2293
2294 extern __inline void
2295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2296 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2297 {
2298 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2299 }
2300
2301 extern __inline __m128i
2302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2303 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2304 {
2305 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2306 (__v4si) __O, __M);
2307 }
2308
2309 extern __inline __m128i
2310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2311 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
2312 {
2313 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2314 (__v4si)
2315 _mm_setzero_si128 (),
2316 __M);
2317 }
2318
2319 extern __inline __m128i
2320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2321 _mm256_cvtepi64_epi32 (__m256i __A)
2322 {
2323 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2324 (__v4si)_mm_undefined_si128(),
2325 (__mmask8) -1);
2326 }
2327
2328 extern __inline void
2329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2330 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2331 {
2332 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2333 }
2334
2335 extern __inline __m128i
2336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2337 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2338 {
2339 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2340 (__v4si) __O, __M);
2341 }
2342
2343 extern __inline __m128i
2344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2345 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
2346 {
2347 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2348 (__v4si)
2349 _mm_setzero_si128 (),
2350 __M);
2351 }
2352
2353 extern __inline __m128i
2354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2355 _mm_cvtsepi64_epi32 (__m128i __A)
2356 {
2357 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2358 (__v4si)_mm_undefined_si128(),
2359 (__mmask8) -1);
2360 }
2361
2362 extern __inline void
2363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2364 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2365 {
2366 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2367 }
2368
2369 extern __inline __m128i
2370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2371 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2372 {
2373 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2374 (__v4si) __O, __M);
2375 }
2376
2377 extern __inline __m128i
2378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2379 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
2380 {
2381 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2382 (__v4si)
2383 _mm_setzero_si128 (),
2384 __M);
2385 }
2386
2387 extern __inline __m128i
2388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2389 _mm256_cvtsepi64_epi32 (__m256i __A)
2390 {
2391 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2392 (__v4si)_mm_undefined_si128(),
2393 (__mmask8) -1);
2394 }
2395
2396 extern __inline void
2397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2398 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2399 {
2400 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2401 }
2402
2403 extern __inline __m128i
2404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2405 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2406 {
2407 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2408 (__v4si)__O,
2409 __M);
2410 }
2411
2412 extern __inline __m128i
2413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2414 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
2415 {
2416 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2417 (__v4si)
2418 _mm_setzero_si128 (),
2419 __M);
2420 }
2421
2422 extern __inline __m128i
2423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2424 _mm_cvtusepi64_epi32 (__m128i __A)
2425 {
2426 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2427 (__v4si)_mm_undefined_si128(),
2428 (__mmask8) -1);
2429 }
2430
2431 extern __inline void
2432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2433 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2434 {
2435 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2436 }
2437
2438 extern __inline __m128i
2439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2440 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2441 {
2442 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2443 (__v4si) __O, __M);
2444 }
2445
2446 extern __inline __m128i
2447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2448 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
2449 {
2450 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2451 (__v4si)
2452 _mm_setzero_si128 (),
2453 __M);
2454 }
2455
2456 extern __inline __m128i
2457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2458 _mm256_cvtusepi64_epi32 (__m256i __A)
2459 {
2460 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2461 (__v4si)_mm_undefined_si128(),
2462 (__mmask8) -1);
2463 }
2464
2465 extern __inline void
2466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2467 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2468 {
2469 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2470 }
2471
2472 extern __inline __m128i
2473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2474 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2475 {
2476 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2477 (__v4si) __O, __M);
2478 }
2479
2480 extern __inline __m128i
2481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2482 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
2483 {
2484 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2485 (__v4si)
2486 _mm_setzero_si128 (),
2487 __M);
2488 }
2489
2490 extern __inline __m256
2491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2492 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
2493 {
2494 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2495 (__v8sf) __O,
2496 __M);
2497 }
2498
2499 extern __inline __m256
2500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2501 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2502 {
2503 return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2504 (__v8sf)
2505 _mm256_setzero_ps (),
2506 __M);
2507 }
2508
2509 extern __inline __m128
2510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2511 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
2512 {
2513 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2514 (__v4sf) __O,
2515 __M);
2516 }
2517
2518 extern __inline __m128
2519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2520 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2521 {
2522 return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2523 (__v4sf)
2524 _mm_setzero_ps (),
2525 __M);
2526 }
2527
2528 extern __inline __m256d
2529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2530 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
2531 {
2532 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2533 (__v4df) __O,
2534 __M);
2535 }
2536
2537 extern __inline __m256d
2538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2539 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
2540 {
2541 return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2542 (__v4df)
2543 _mm256_setzero_pd (),
2544 __M);
2545 }
2546
2547 extern __inline __m256i
2548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2549 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
2550 {
2551 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2552 (__v8si) __O,
2553 __M);
2554 }
2555
2556 extern __inline __m256i
2557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2558 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2559 {
2560 return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2561 (__v8si)
2562 _mm256_setzero_si256 (),
2563 __M);
2564 }
2565
2566 extern __inline __m256i
2567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2568 _mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A)
2569 {
2570 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O,
2571 __M);
2572 }
2573
2574 extern __inline __m256i
2575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2576 _mm256_maskz_set1_epi32 (__mmask8 __M, int __A)
2577 {
2578 return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A,
2579 (__v8si)
2580 _mm256_setzero_si256 (),
2581 __M);
2582 }
2583
2584 extern __inline __m128i
2585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2586 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2587 {
2588 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2589 (__v4si) __O,
2590 __M);
2591 }
2592
2593 extern __inline __m128i
2594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2595 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2596 {
2597 return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2598 (__v4si)
2599 _mm_setzero_si128 (),
2600 __M);
2601 }
2602
2603 extern __inline __m128i
2604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2605 _mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A)
2606 {
2607 return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O,
2608 __M);
2609 }
2610
2611 extern __inline __m128i
2612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2613 _mm_maskz_set1_epi32 (__mmask8 __M, int __A)
2614 {
2615 return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A,
2616 (__v4si)
2617 _mm_setzero_si128 (),
2618 __M);
2619 }
2620
2621 extern __inline __m256i
2622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2623 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
2624 {
2625 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2626 (__v4di) __O,
2627 __M);
2628 }
2629
2630 extern __inline __m256i
2631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2632 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2633 {
2634 return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2635 (__v4di)
2636 _mm256_setzero_si256 (),
2637 __M);
2638 }
2639
2640 extern __inline __m256i
2641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2642 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
2643 {
2644 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
2645 __M);
2646 }
2647
2648 extern __inline __m256i
2649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2650 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
2651 {
2652 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
2653 (__v4di)
2654 _mm256_setzero_si256 (),
2655 __M);
2656 }
2657
2658 extern __inline __m128i
2659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2660 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
2661 {
2662 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2663 (__v2di) __O,
2664 __M);
2665 }
2666
2667 extern __inline __m128i
2668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2669 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2670 {
2671 return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2672 (__v2di)
2673 _mm_setzero_si128 (),
2674 __M);
2675 }
2676
2677 extern __inline __m128i
2678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2679 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
2680 {
2681 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
2682 __M);
2683 }
2684
2685 extern __inline __m128i
2686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2687 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
2688 {
2689 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
2690 (__v2di)
2691 _mm_setzero_si128 (),
2692 __M);
2693 }
2694
2695 extern __inline __m256
2696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2697 _mm256_broadcast_f32x4 (__m128 __A)
2698 {
2699 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2700 (__v8sf)_mm256_undefined_pd (),
2701 (__mmask8) -
2702 1);
2703 }
2704
2705 extern __inline __m256
2706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2707 _mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
2708 {
2709 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2710 (__v8sf) __O,
2711 __M);
2712 }
2713
2714 extern __inline __m256
2715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2716 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
2717 {
2718 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2719 (__v8sf)
2720 _mm256_setzero_ps (),
2721 __M);
2722 }
2723
2724 extern __inline __m256i
2725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2726 _mm256_broadcast_i32x4 (__m128i __A)
2727 {
2728 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2729 __A,
2730 (__v8si)_mm256_undefined_si256 (),
2731 (__mmask8) -
2732 1);
2733 }
2734
2735 extern __inline __m256i
2736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2737 _mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
2738 {
2739 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2740 __A,
2741 (__v8si)
2742 __O, __M);
2743 }
2744
2745 extern __inline __m256i
2746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2747 _mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
2748 {
2749 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2750 __A,
2751 (__v8si)
2752 _mm256_setzero_si256 (),
2753 __M);
2754 }
2755
2756 extern __inline __m256i
2757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2758 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2759 {
2760 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2761 (__v8si) __W,
2762 (__mmask8) __U);
2763 }
2764
2765 extern __inline __m256i
2766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2767 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2768 {
2769 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2770 (__v8si)
2771 _mm256_setzero_si256 (),
2772 (__mmask8) __U);
2773 }
2774
2775 extern __inline __m128i
2776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2777 _mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2778 {
2779 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2780 (__v4si) __W,
2781 (__mmask8) __U);
2782 }
2783
2784 extern __inline __m128i
2785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2786 _mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2787 {
2788 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2789 (__v4si)
2790 _mm_setzero_si128 (),
2791 (__mmask8) __U);
2792 }
2793
2794 extern __inline __m256i
2795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2796 _mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2797 {
2798 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2799 (__v4di) __W,
2800 (__mmask8) __U);
2801 }
2802
2803 extern __inline __m256i
2804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2805 _mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2806 {
2807 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2808 (__v4di)
2809 _mm256_setzero_si256 (),
2810 (__mmask8) __U);
2811 }
2812
2813 extern __inline __m128i
2814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2815 _mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2816 {
2817 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2818 (__v2di) __W,
2819 (__mmask8) __U);
2820 }
2821
2822 extern __inline __m128i
2823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2824 _mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2825 {
2826 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2827 (__v2di)
2828 _mm_setzero_si128 (),
2829 (__mmask8) __U);
2830 }
2831
2832 extern __inline __m256i
2833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2834 _mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2835 {
2836 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2837 (__v8si) __W,
2838 (__mmask8) __U);
2839 }
2840
2841 extern __inline __m256i
2842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2843 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2844 {
2845 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2846 (__v8si)
2847 _mm256_setzero_si256 (),
2848 (__mmask8) __U);
2849 }
2850
2851 extern __inline __m128i
2852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2853 _mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2854 {
2855 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2856 (__v4si) __W,
2857 (__mmask8) __U);
2858 }
2859
2860 extern __inline __m128i
2861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2862 _mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2863 {
2864 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2865 (__v4si)
2866 _mm_setzero_si128 (),
2867 (__mmask8) __U);
2868 }
2869
2870 extern __inline __m256i
2871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2872 _mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2873 {
2874 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2875 (__v4di) __W,
2876 (__mmask8) __U);
2877 }
2878
2879 extern __inline __m256i
2880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2881 _mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2882 {
2883 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2884 (__v4di)
2885 _mm256_setzero_si256 (),
2886 (__mmask8) __U);
2887 }
2888
2889 extern __inline __m128i
2890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2891 _mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2892 {
2893 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2894 (__v2di) __W,
2895 (__mmask8) __U);
2896 }
2897
2898 extern __inline __m128i
2899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2900 _mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2901 {
2902 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2903 (__v2di)
2904 _mm_setzero_si128 (),
2905 (__mmask8) __U);
2906 }
2907
2908 extern __inline __m256i
2909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2910 _mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
2911 {
2912 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2913 (__v4di) __W,
2914 (__mmask8) __U);
2915 }
2916
2917 extern __inline __m256i
2918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2919 _mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2920 {
2921 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2922 (__v4di)
2923 _mm256_setzero_si256 (),
2924 (__mmask8) __U);
2925 }
2926
2927 extern __inline __m128i
2928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2929 _mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
2930 {
2931 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2932 (__v2di) __W,
2933 (__mmask8) __U);
2934 }
2935
2936 extern __inline __m128i
2937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2938 _mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2939 {
2940 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2941 (__v2di)
2942 _mm_setzero_si128 (),
2943 (__mmask8) __U);
2944 }
2945
2946 extern __inline __m256i
2947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2948 _mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2949 {
2950 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2951 (__v8si) __W,
2952 (__mmask8) __U);
2953 }
2954
2955 extern __inline __m256i
2956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2957 _mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2958 {
2959 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2960 (__v8si)
2961 _mm256_setzero_si256 (),
2962 (__mmask8) __U);
2963 }
2964
2965 extern __inline __m128i
2966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2967 _mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2968 {
2969 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
2970 (__v4si) __W,
2971 (__mmask8) __U);
2972 }
2973
2974 extern __inline __m128i
2975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2976 _mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2977 {
2978 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
2979 (__v4si)
2980 _mm_setzero_si128 (),
2981 (__mmask8) __U);
2982 }
2983
2984 extern __inline __m256i
2985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2986 _mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2987 {
2988 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
2989 (__v4di) __W,
2990 (__mmask8) __U);
2991 }
2992
2993 extern __inline __m256i
2994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2995 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
2996 {
2997 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
2998 (__v4di)
2999 _mm256_setzero_si256 (),
3000 (__mmask8) __U);
3001 }
3002
3003 extern __inline __m128i
3004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3005 _mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3006 {
3007 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3008 (__v2di) __W,
3009 (__mmask8) __U);
3010 }
3011
3012 extern __inline __m128i
3013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3014 _mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3015 {
3016 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3017 (__v2di)
3018 _mm_setzero_si128 (),
3019 (__mmask8) __U);
3020 }
3021
3022 extern __inline __m256i
3023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3024 _mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3025 {
3026 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3027 (__v8si) __W,
3028 (__mmask8) __U);
3029 }
3030
3031 extern __inline __m256i
3032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3033 _mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3034 {
3035 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3036 (__v8si)
3037 _mm256_setzero_si256 (),
3038 (__mmask8) __U);
3039 }
3040
3041 extern __inline __m128i
3042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3043 _mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
3044 {
3045 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3046 (__v4si) __W,
3047 (__mmask8) __U);
3048 }
3049
3050 extern __inline __m128i
3051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3052 _mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3053 {
3054 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3055 (__v4si)
3056 _mm_setzero_si128 (),
3057 (__mmask8) __U);
3058 }
3059
3060 extern __inline __m256i
3061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3062 _mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
3063 {
3064 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3065 (__v4di) __W,
3066 (__mmask8) __U);
3067 }
3068
3069 extern __inline __m256i
3070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3071 _mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3072 {
3073 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3074 (__v4di)
3075 _mm256_setzero_si256 (),
3076 (__mmask8) __U);
3077 }
3078
3079 extern __inline __m128i
3080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3081 _mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3082 {
3083 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3084 (__v2di) __W,
3085 (__mmask8) __U);
3086 }
3087
3088 extern __inline __m128i
3089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3090 _mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3091 {
3092 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3093 (__v2di)
3094 _mm_setzero_si128 (),
3095 (__mmask8) __U);
3096 }
3097
3098 extern __inline __m256i
3099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3100 _mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
3101 {
3102 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3103 (__v4di) __W,
3104 (__mmask8) __U);
3105 }
3106
3107 extern __inline __m256i
3108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3109 _mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3110 {
3111 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3112 (__v4di)
3113 _mm256_setzero_si256 (),
3114 (__mmask8) __U);
3115 }
3116
3117 extern __inline __m128i
3118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3119 _mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
3120 {
3121 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3122 (__v2di) __W,
3123 (__mmask8) __U);
3124 }
3125
3126 extern __inline __m128i
3127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3128 _mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3129 {
3130 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3131 (__v2di)
3132 _mm_setzero_si128 (),
3133 (__mmask8) __U);
3134 }
3135
3136 extern __inline __m256d
3137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3138 _mm256_rcp14_pd (__m256d __A)
3139 {
3140 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3141 (__v4df)
3142 _mm256_setzero_pd (),
3143 (__mmask8) -1);
3144 }
3145
3146 extern __inline __m256d
3147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3148 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3149 {
3150 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3151 (__v4df) __W,
3152 (__mmask8) __U);
3153 }
3154
3155 extern __inline __m256d
3156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3157 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
3158 {
3159 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3160 (__v4df)
3161 _mm256_setzero_pd (),
3162 (__mmask8) __U);
3163 }
3164
3165 extern __inline __m128d
3166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3167 _mm_rcp14_pd (__m128d __A)
3168 {
3169 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3170 (__v2df)
3171 _mm_setzero_pd (),
3172 (__mmask8) -1);
3173 }
3174
3175 extern __inline __m128d
3176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3177 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3178 {
3179 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3180 (__v2df) __W,
3181 (__mmask8) __U);
3182 }
3183
3184 extern __inline __m128d
3185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3186 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
3187 {
3188 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3189 (__v2df)
3190 _mm_setzero_pd (),
3191 (__mmask8) __U);
3192 }
3193
3194 extern __inline __m256
3195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3196 _mm256_rcp14_ps (__m256 __A)
3197 {
3198 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3199 (__v8sf)
3200 _mm256_setzero_ps (),
3201 (__mmask8) -1);
3202 }
3203
3204 extern __inline __m256
3205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3206 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3207 {
3208 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3209 (__v8sf) __W,
3210 (__mmask8) __U);
3211 }
3212
3213 extern __inline __m256
3214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3215 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
3216 {
3217 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3218 (__v8sf)
3219 _mm256_setzero_ps (),
3220 (__mmask8) __U);
3221 }
3222
3223 extern __inline __m128
3224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3225 _mm_rcp14_ps (__m128 __A)
3226 {
3227 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3228 (__v4sf)
3229 _mm_setzero_ps (),
3230 (__mmask8) -1);
3231 }
3232
3233 extern __inline __m128
3234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3235 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3236 {
3237 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3238 (__v4sf) __W,
3239 (__mmask8) __U);
3240 }
3241
3242 extern __inline __m128
3243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3244 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
3245 {
3246 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3247 (__v4sf)
3248 _mm_setzero_ps (),
3249 (__mmask8) __U);
3250 }
3251
3252 extern __inline __m256d
3253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3254 _mm256_rsqrt14_pd (__m256d __A)
3255 {
3256 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3257 (__v4df)
3258 _mm256_setzero_pd (),
3259 (__mmask8) -1);
3260 }
3261
3262 extern __inline __m256d
3263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3264 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3265 {
3266 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3267 (__v4df) __W,
3268 (__mmask8) __U);
3269 }
3270
3271 extern __inline __m256d
3272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3273 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
3274 {
3275 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3276 (__v4df)
3277 _mm256_setzero_pd (),
3278 (__mmask8) __U);
3279 }
3280
3281 extern __inline __m128d
3282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3283 _mm_rsqrt14_pd (__m128d __A)
3284 {
3285 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3286 (__v2df)
3287 _mm_setzero_pd (),
3288 (__mmask8) -1);
3289 }
3290
3291 extern __inline __m128d
3292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3293 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3294 {
3295 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3296 (__v2df) __W,
3297 (__mmask8) __U);
3298 }
3299
3300 extern __inline __m128d
3301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3302 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
3303 {
3304 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3305 (__v2df)
3306 _mm_setzero_pd (),
3307 (__mmask8) __U);
3308 }
3309
3310 extern __inline __m256
3311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3312 _mm256_rsqrt14_ps (__m256 __A)
3313 {
3314 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3315 (__v8sf)
3316 _mm256_setzero_ps (),
3317 (__mmask8) -1);
3318 }
3319
3320 extern __inline __m256
3321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3322 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3323 {
3324 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3325 (__v8sf) __W,
3326 (__mmask8) __U);
3327 }
3328
3329 extern __inline __m256
3330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3331 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
3332 {
3333 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3334 (__v8sf)
3335 _mm256_setzero_ps (),
3336 (__mmask8) __U);
3337 }
3338
3339 extern __inline __m128
3340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3341 _mm_rsqrt14_ps (__m128 __A)
3342 {
3343 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3344 (__v4sf)
3345 _mm_setzero_ps (),
3346 (__mmask8) -1);
3347 }
3348
3349 extern __inline __m128
3350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3351 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3352 {
3353 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3354 (__v4sf) __W,
3355 (__mmask8) __U);
3356 }
3357
3358 extern __inline __m128
3359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3360 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
3361 {
3362 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3363 (__v4sf)
3364 _mm_setzero_ps (),
3365 (__mmask8) __U);
3366 }
3367
3368 extern __inline __m256d
3369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3370 _mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A)
3371 {
3372 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3373 (__v4df) __W,
3374 (__mmask8) __U);
3375 }
3376
3377 extern __inline __m256d
3378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3379 _mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A)
3380 {
3381 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3382 (__v4df)
3383 _mm256_setzero_pd (),
3384 (__mmask8) __U);
3385 }
3386
3387 extern __inline __m128d
3388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3389 _mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A)
3390 {
3391 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3392 (__v2df) __W,
3393 (__mmask8) __U);
3394 }
3395
3396 extern __inline __m128d
3397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3398 _mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A)
3399 {
3400 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3401 (__v2df)
3402 _mm_setzero_pd (),
3403 (__mmask8) __U);
3404 }
3405
3406 extern __inline __m256
3407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3408 _mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A)
3409 {
3410 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3411 (__v8sf) __W,
3412 (__mmask8) __U);
3413 }
3414
3415 extern __inline __m256
3416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3417 _mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A)
3418 {
3419 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3420 (__v8sf)
3421 _mm256_setzero_ps (),
3422 (__mmask8) __U);
3423 }
3424
3425 extern __inline __m128
3426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3427 _mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A)
3428 {
3429 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3430 (__v4sf) __W,
3431 (__mmask8) __U);
3432 }
3433
3434 extern __inline __m128
3435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3436 _mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A)
3437 {
3438 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3439 (__v4sf)
3440 _mm_setzero_ps (),
3441 (__mmask8) __U);
3442 }
3443
3444 extern __inline __m256i
3445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3446 _mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3447 __m256i __B)
3448 {
3449 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3450 (__v8si) __B,
3451 (__v8si) __W,
3452 (__mmask8) __U);
3453 }
3454
3455 extern __inline __m256i
3456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3457 _mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3458 {
3459 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3460 (__v8si) __B,
3461 (__v8si)
3462 _mm256_setzero_si256 (),
3463 (__mmask8) __U);
3464 }
3465
3466 extern __inline __m256i
3467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3468 _mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3469 __m256i __B)
3470 {
3471 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3472 (__v4di) __B,
3473 (__v4di) __W,
3474 (__mmask8) __U);
3475 }
3476
3477 extern __inline __m256i
3478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3479 _mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3480 {
3481 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3482 (__v4di) __B,
3483 (__v4di)
3484 _mm256_setzero_si256 (),
3485 (__mmask8) __U);
3486 }
3487
3488 extern __inline __m256i
3489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3490 _mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3491 __m256i __B)
3492 {
3493 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3494 (__v8si) __B,
3495 (__v8si) __W,
3496 (__mmask8) __U);
3497 }
3498
3499 extern __inline __m256i
3500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3501 _mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3502 {
3503 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3504 (__v8si) __B,
3505 (__v8si)
3506 _mm256_setzero_si256 (),
3507 (__mmask8) __U);
3508 }
3509
3510 extern __inline __m256i
3511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3512 _mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3513 __m256i __B)
3514 {
3515 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3516 (__v4di) __B,
3517 (__v4di) __W,
3518 (__mmask8) __U);
3519 }
3520
3521 extern __inline __m256i
3522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3523 _mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3524 {
3525 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3526 (__v4di) __B,
3527 (__v4di)
3528 _mm256_setzero_si256 (),
3529 (__mmask8) __U);
3530 }
3531
3532 extern __inline __m128i
3533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3534 _mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3535 __m128i __B)
3536 {
3537 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3538 (__v4si) __B,
3539 (__v4si) __W,
3540 (__mmask8) __U);
3541 }
3542
3543 extern __inline __m128i
3544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3545 _mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3546 {
3547 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3548 (__v4si) __B,
3549 (__v4si)
3550 _mm_setzero_si128 (),
3551 (__mmask8) __U);
3552 }
3553
3554 extern __inline __m128i
3555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3556 _mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3557 __m128i __B)
3558 {
3559 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3560 (__v2di) __B,
3561 (__v2di) __W,
3562 (__mmask8) __U);
3563 }
3564
3565 extern __inline __m128i
3566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3567 _mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3568 {
3569 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3570 (__v2di) __B,
3571 (__v2di)
3572 _mm_setzero_si128 (),
3573 (__mmask8) __U);
3574 }
3575
3576 extern __inline __m128i
3577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3578 _mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3579 __m128i __B)
3580 {
3581 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3582 (__v4si) __B,
3583 (__v4si) __W,
3584 (__mmask8) __U);
3585 }
3586
3587 extern __inline __m128i
3588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3589 _mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3590 {
3591 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3592 (__v4si) __B,
3593 (__v4si)
3594 _mm_setzero_si128 (),
3595 (__mmask8) __U);
3596 }
3597
3598 extern __inline __m128i
3599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3600 _mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3601 __m128i __B)
3602 {
3603 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3604 (__v2di) __B,
3605 (__v2di) __W,
3606 (__mmask8) __U);
3607 }
3608
3609 extern __inline __m128i
3610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3611 _mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3612 {
3613 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3614 (__v2di) __B,
3615 (__v2di)
3616 _mm_setzero_si128 (),
3617 (__mmask8) __U);
3618 }
3619
3620 extern __inline __m256
3621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3622 _mm256_getexp_ps (__m256 __A)
3623 {
3624 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3625 (__v8sf)
3626 _mm256_setzero_ps (),
3627 (__mmask8) -1);
3628 }
3629
3630 extern __inline __m256
3631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3632 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A)
3633 {
3634 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3635 (__v8sf) __W,
3636 (__mmask8) __U);
3637 }
3638
3639 extern __inline __m256
3640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3641 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A)
3642 {
3643 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3644 (__v8sf)
3645 _mm256_setzero_ps (),
3646 (__mmask8) __U);
3647 }
3648
3649 extern __inline __m256d
3650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3651 _mm256_getexp_pd (__m256d __A)
3652 {
3653 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3654 (__v4df)
3655 _mm256_setzero_pd (),
3656 (__mmask8) -1);
3657 }
3658
3659 extern __inline __m256d
3660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3661 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A)
3662 {
3663 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3664 (__v4df) __W,
3665 (__mmask8) __U);
3666 }
3667
3668 extern __inline __m256d
3669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3670 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A)
3671 {
3672 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3673 (__v4df)
3674 _mm256_setzero_pd (),
3675 (__mmask8) __U);
3676 }
3677
3678 extern __inline __m128
3679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3680 _mm_getexp_ps (__m128 __A)
3681 {
3682 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3683 (__v4sf)
3684 _mm_setzero_ps (),
3685 (__mmask8) -1);
3686 }
3687
3688 extern __inline __m128
3689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3690 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A)
3691 {
3692 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3693 (__v4sf) __W,
3694 (__mmask8) __U);
3695 }
3696
3697 extern __inline __m128
3698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3699 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A)
3700 {
3701 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3702 (__v4sf)
3703 _mm_setzero_ps (),
3704 (__mmask8) __U);
3705 }
3706
3707 extern __inline __m128d
3708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3709 _mm_getexp_pd (__m128d __A)
3710 {
3711 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3712 (__v2df)
3713 _mm_setzero_pd (),
3714 (__mmask8) -1);
3715 }
3716
3717 extern __inline __m128d
3718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3719 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A)
3720 {
3721 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3722 (__v2df) __W,
3723 (__mmask8) __U);
3724 }
3725
3726 extern __inline __m128d
3727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3728 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A)
3729 {
3730 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3731 (__v2df)
3732 _mm_setzero_pd (),
3733 (__mmask8) __U);
3734 }
3735
3736 extern __inline __m256i
3737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3738 _mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3739 __m128i __B)
3740 {
3741 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3742 (__v4si) __B,
3743 (__v8si) __W,
3744 (__mmask8) __U);
3745 }
3746
3747 extern __inline __m256i
3748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3749 _mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
3750 {
3751 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3752 (__v4si) __B,
3753 (__v8si)
3754 _mm256_setzero_si256 (),
3755 (__mmask8) __U);
3756 }
3757
3758 extern __inline __m128i
3759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3760 _mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3761 __m128i __B)
3762 {
3763 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3764 (__v4si) __B,
3765 (__v4si) __W,
3766 (__mmask8) __U);
3767 }
3768
3769 extern __inline __m128i
3770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3771 _mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3772 {
3773 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3774 (__v4si) __B,
3775 (__v4si)
3776 _mm_setzero_si128 (),
3777 (__mmask8) __U);
3778 }
3779
3780 extern __inline __m256i
3781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3782 _mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3783 __m128i __B)
3784 {
3785 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3786 (__v2di) __B,
3787 (__v4di) __W,
3788 (__mmask8) __U);
3789 }
3790
3791 extern __inline __m256i
3792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3793 _mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
3794 {
3795 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3796 (__v2di) __B,
3797 (__v4di)
3798 _mm256_setzero_si256 (),
3799 (__mmask8) __U);
3800 }
3801
3802 extern __inline __m128i
3803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3804 _mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3805 __m128i __B)
3806 {
3807 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3808 (__v2di) __B,
3809 (__v2di) __W,
3810 (__mmask8) __U);
3811 }
3812
3813 extern __inline __m128i
3814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3815 _mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3816 {
3817 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3818 (__v2di) __B,
3819 (__v2di)
3820 _mm_setzero_di (),
3821 (__mmask8) __U);
3822 }
3823
3824 extern __inline __m256i
3825 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3826 _mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3827 __m256i __B)
3828 {
3829 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3830 (__v8si) __B,
3831 (__v8si) __W,
3832 (__mmask8) __U);
3833 }
3834
3835 extern __inline __m256i
3836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3837 _mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3838 {
3839 return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3840 (__v8si) __B,
3841 (__v8si)
3842 _mm256_setzero_si256 (),
3843 (__mmask8) __U);
3844 }
3845
3846 extern __inline __m256d
3847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3848 _mm256_scalef_pd (__m256d __A, __m256d __B)
3849 {
3850 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3851 (__v4df) __B,
3852 (__v4df)
3853 _mm256_setzero_pd (),
3854 (__mmask8) -1);
3855 }
3856
3857 extern __inline __m256d
3858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3859 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3860 __m256d __B)
3861 {
3862 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3863 (__v4df) __B,
3864 (__v4df) __W,
3865 (__mmask8) __U);
3866 }
3867
3868 extern __inline __m256d
3869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3870 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B)
3871 {
3872 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3873 (__v4df) __B,
3874 (__v4df)
3875 _mm256_setzero_pd (),
3876 (__mmask8) __U);
3877 }
3878
3879 extern __inline __m256
3880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3881 _mm256_scalef_ps (__m256 __A, __m256 __B)
3882 {
3883 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3884 (__v8sf) __B,
3885 (__v8sf)
3886 _mm256_setzero_ps (),
3887 (__mmask8) -1);
3888 }
3889
3890 extern __inline __m256
3891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3892 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3893 __m256 __B)
3894 {
3895 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3896 (__v8sf) __B,
3897 (__v8sf) __W,
3898 (__mmask8) __U);
3899 }
3900
3901 extern __inline __m256
3902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3903 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B)
3904 {
3905 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3906 (__v8sf) __B,
3907 (__v8sf)
3908 _mm256_setzero_ps (),
3909 (__mmask8) __U);
3910 }
3911
3912 extern __inline __m128d
3913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3914 _mm_scalef_pd (__m128d __A, __m128d __B)
3915 {
3916 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3917 (__v2df) __B,
3918 (__v2df)
3919 _mm_setzero_pd (),
3920 (__mmask8) -1);
3921 }
3922
3923 extern __inline __m128d
3924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3925 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3926 __m128d __B)
3927 {
3928 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3929 (__v2df) __B,
3930 (__v2df) __W,
3931 (__mmask8) __U);
3932 }
3933
3934 extern __inline __m128d
3935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3936 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B)
3937 {
3938 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3939 (__v2df) __B,
3940 (__v2df)
3941 _mm_setzero_pd (),
3942 (__mmask8) __U);
3943 }
3944
3945 extern __inline __m128
3946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3947 _mm_scalef_ps (__m128 __A, __m128 __B)
3948 {
3949 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3950 (__v4sf) __B,
3951 (__v4sf)
3952 _mm_setzero_ps (),
3953 (__mmask8) -1);
3954 }
3955
3956 extern __inline __m128
3957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3958 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
3959 {
3960 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3961 (__v4sf) __B,
3962 (__v4sf) __W,
3963 (__mmask8) __U);
3964 }
3965
3966 extern __inline __m128
3967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3968 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B)
3969 {
3970 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3971 (__v4sf) __B,
3972 (__v4sf)
3973 _mm_setzero_ps (),
3974 (__mmask8) __U);
3975 }
3976
3977 extern __inline __m256d
3978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3979 _mm256_mask_fmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
3980 __m256d __C)
3981 {
3982 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
3983 (__v4df) __B,
3984 (__v4df) __C,
3985 (__mmask8) __U);
3986 }
3987
3988 extern __inline __m256d
3989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3990 _mm256_mask3_fmadd_pd (__m256d __A, __m256d __B, __m256d __C,
3991 __mmask8 __U)
3992 {
3993 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
3994 (__v4df) __B,
3995 (__v4df) __C,
3996 (__mmask8) __U);
3997 }
3998
3999 extern __inline __m256d
4000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4001 _mm256_maskz_fmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4002 __m256d __C)
4003 {
4004 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4005 (__v4df) __B,
4006 (__v4df) __C,
4007 (__mmask8) __U);
4008 }
4009
4010 extern __inline __m128d
4011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4012 _mm_mask_fmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4013 {
4014 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4015 (__v2df) __B,
4016 (__v2df) __C,
4017 (__mmask8) __U);
4018 }
4019
4020 extern __inline __m128d
4021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4022 _mm_mask3_fmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4023 __mmask8 __U)
4024 {
4025 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
4026 (__v2df) __B,
4027 (__v2df) __C,
4028 (__mmask8) __U);
4029 }
4030
4031 extern __inline __m128d
4032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4033 _mm_maskz_fmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4034 __m128d __C)
4035 {
4036 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4037 (__v2df) __B,
4038 (__v2df) __C,
4039 (__mmask8) __U);
4040 }
4041
4042 extern __inline __m256
4043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4044 _mm256_mask_fmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4045 {
4046 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4047 (__v8sf) __B,
4048 (__v8sf) __C,
4049 (__mmask8) __U);
4050 }
4051
4052 extern __inline __m256
4053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4054 _mm256_mask3_fmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4055 __mmask8 __U)
4056 {
4057 return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
4058 (__v8sf) __B,
4059 (__v8sf) __C,
4060 (__mmask8) __U);
4061 }
4062
4063 extern __inline __m256
4064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4065 _mm256_maskz_fmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4066 __m256 __C)
4067 {
4068 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4069 (__v8sf) __B,
4070 (__v8sf) __C,
4071 (__mmask8) __U);
4072 }
4073
4074 extern __inline __m128
4075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4076 _mm_mask_fmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4077 {
4078 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4079 (__v4sf) __B,
4080 (__v4sf) __C,
4081 (__mmask8) __U);
4082 }
4083
4084 extern __inline __m128
4085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4086 _mm_mask3_fmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4087 {
4088 return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
4089 (__v4sf) __B,
4090 (__v4sf) __C,
4091 (__mmask8) __U);
4092 }
4093
4094 extern __inline __m128
4095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4096 _mm_maskz_fmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4097 {
4098 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4099 (__v4sf) __B,
4100 (__v4sf) __C,
4101 (__mmask8) __U);
4102 }
4103
4104 extern __inline __m256d
4105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4106 _mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4107 __m256d __C)
4108 {
4109 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
4110 (__v4df) __B,
4111 -(__v4df) __C,
4112 (__mmask8) __U);
4113 }
4114
4115 extern __inline __m256d
4116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4117 _mm256_mask3_fmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4118 __mmask8 __U)
4119 {
4120 return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
4121 (__v4df) __B,
4122 (__v4df) __C,
4123 (__mmask8) __U);
4124 }
4125
4126 extern __inline __m256d
4127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4128 _mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4129 __m256d __C)
4130 {
4131 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4132 (__v4df) __B,
4133 -(__v4df) __C,
4134 (__mmask8) __U);
4135 }
4136
4137 extern __inline __m128d
4138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4139 _mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4140 {
4141 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4142 (__v2df) __B,
4143 -(__v2df) __C,
4144 (__mmask8) __U);
4145 }
4146
4147 extern __inline __m128d
4148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4149 _mm_mask3_fmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4150 __mmask8 __U)
4151 {
4152 return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
4153 (__v2df) __B,
4154 (__v2df) __C,
4155 (__mmask8) __U);
4156 }
4157
4158 extern __inline __m128d
4159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4160 _mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4161 __m128d __C)
4162 {
4163 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4164 (__v2df) __B,
4165 -(__v2df) __C,
4166 (__mmask8) __U);
4167 }
4168
4169 extern __inline __m256
4170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4171 _mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4172 {
4173 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4174 (__v8sf) __B,
4175 -(__v8sf) __C,
4176 (__mmask8) __U);
4177 }
4178
4179 extern __inline __m256
4180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4181 _mm256_mask3_fmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4182 __mmask8 __U)
4183 {
4184 return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
4185 (__v8sf) __B,
4186 (__v8sf) __C,
4187 (__mmask8) __U);
4188 }
4189
4190 extern __inline __m256
4191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4192 _mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4193 __m256 __C)
4194 {
4195 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4196 (__v8sf) __B,
4197 -(__v8sf) __C,
4198 (__mmask8) __U);
4199 }
4200
4201 extern __inline __m128
4202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4203 _mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4204 {
4205 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4206 (__v4sf) __B,
4207 -(__v4sf) __C,
4208 (__mmask8) __U);
4209 }
4210
4211 extern __inline __m128
4212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4213 _mm_mask3_fmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4214 {
4215 return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
4216 (__v4sf) __B,
4217 (__v4sf) __C,
4218 (__mmask8) __U);
4219 }
4220
4221 extern __inline __m128
4222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4223 _mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4224 {
4225 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4226 (__v4sf) __B,
4227 -(__v4sf) __C,
4228 (__mmask8) __U);
4229 }
4230
4231 extern __inline __m256d
4232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4233 _mm256_mask_fmaddsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4234 __m256d __C)
4235 {
4236 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4237 (__v4df) __B,
4238 (__v4df) __C,
4239 (__mmask8) __U);
4240 }
4241
4242 extern __inline __m256d
4243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4244 _mm256_mask3_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C,
4245 __mmask8 __U)
4246 {
4247 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
4248 (__v4df) __B,
4249 (__v4df) __C,
4250 (__mmask8)
4251 __U);
4252 }
4253
4254 extern __inline __m256d
4255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4256 _mm256_maskz_fmaddsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4257 __m256d __C)
4258 {
4259 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4260 (__v4df) __B,
4261 (__v4df) __C,
4262 (__mmask8)
4263 __U);
4264 }
4265
4266 extern __inline __m128d
4267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4268 _mm_mask_fmaddsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4269 __m128d __C)
4270 {
4271 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4272 (__v2df) __B,
4273 (__v2df) __C,
4274 (__mmask8) __U);
4275 }
4276
4277 extern __inline __m128d
4278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4279 _mm_mask3_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C,
4280 __mmask8 __U)
4281 {
4282 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
4283 (__v2df) __B,
4284 (__v2df) __C,
4285 (__mmask8)
4286 __U);
4287 }
4288
4289 extern __inline __m128d
4290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4291 _mm_maskz_fmaddsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4292 __m128d __C)
4293 {
4294 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4295 (__v2df) __B,
4296 (__v2df) __C,
4297 (__mmask8)
4298 __U);
4299 }
4300
4301 extern __inline __m256
4302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4303 _mm256_mask_fmaddsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4304 __m256 __C)
4305 {
4306 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4307 (__v8sf) __B,
4308 (__v8sf) __C,
4309 (__mmask8) __U);
4310 }
4311
4312 extern __inline __m256
4313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4314 _mm256_mask3_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C,
4315 __mmask8 __U)
4316 {
4317 return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
4318 (__v8sf) __B,
4319 (__v8sf) __C,
4320 (__mmask8) __U);
4321 }
4322
4323 extern __inline __m256
4324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4325 _mm256_maskz_fmaddsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4326 __m256 __C)
4327 {
4328 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4329 (__v8sf) __B,
4330 (__v8sf) __C,
4331 (__mmask8) __U);
4332 }
4333
4334 extern __inline __m128
4335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4336 _mm_mask_fmaddsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4337 {
4338 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4339 (__v4sf) __B,
4340 (__v4sf) __C,
4341 (__mmask8) __U);
4342 }
4343
4344 extern __inline __m128
4345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4346 _mm_mask3_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C,
4347 __mmask8 __U)
4348 {
4349 return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
4350 (__v4sf) __B,
4351 (__v4sf) __C,
4352 (__mmask8) __U);
4353 }
4354
4355 extern __inline __m128
4356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4357 _mm_maskz_fmaddsub_ps (__mmask8 __U, __m128 __A, __m128 __B,
4358 __m128 __C)
4359 {
4360 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4361 (__v4sf) __B,
4362 (__v4sf) __C,
4363 (__mmask8) __U);
4364 }
4365
4366 extern __inline __m256d
4367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4368 _mm256_mask_fmsubadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4369 __m256d __C)
4370 {
4371 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4372 (__v4df) __B,
4373 -(__v4df) __C,
4374 (__mmask8) __U);
4375 }
4376
4377 extern __inline __m256d
4378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4379 _mm256_mask3_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C,
4380 __mmask8 __U)
4381 {
4382 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
4383 (__v4df) __B,
4384 (__v4df) __C,
4385 (__mmask8)
4386 __U);
4387 }
4388
4389 extern __inline __m256d
4390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4391 _mm256_maskz_fmsubadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4392 __m256d __C)
4393 {
4394 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4395 (__v4df) __B,
4396 -(__v4df) __C,
4397 (__mmask8)
4398 __U);
4399 }
4400
4401 extern __inline __m128d
4402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4403 _mm_mask_fmsubadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4404 __m128d __C)
4405 {
4406 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4407 (__v2df) __B,
4408 -(__v2df) __C,
4409 (__mmask8) __U);
4410 }
4411
4412 extern __inline __m128d
4413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4414 _mm_mask3_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C,
4415 __mmask8 __U)
4416 {
4417 return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
4418 (__v2df) __B,
4419 (__v2df) __C,
4420 (__mmask8)
4421 __U);
4422 }
4423
4424 extern __inline __m128d
4425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4426 _mm_maskz_fmsubadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4427 __m128d __C)
4428 {
4429 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4430 (__v2df) __B,
4431 -(__v2df) __C,
4432 (__mmask8)
4433 __U);
4434 }
4435
4436 extern __inline __m256
4437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4438 _mm256_mask_fmsubadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4439 __m256 __C)
4440 {
4441 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4442 (__v8sf) __B,
4443 -(__v8sf) __C,
4444 (__mmask8) __U);
4445 }
4446
4447 extern __inline __m256
4448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4449 _mm256_mask3_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C,
4450 __mmask8 __U)
4451 {
4452 return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
4453 (__v8sf) __B,
4454 (__v8sf) __C,
4455 (__mmask8) __U);
4456 }
4457
4458 extern __inline __m256
4459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4460 _mm256_maskz_fmsubadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4461 __m256 __C)
4462 {
4463 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4464 (__v8sf) __B,
4465 -(__v8sf) __C,
4466 (__mmask8) __U);
4467 }
4468
4469 extern __inline __m128
4470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4471 _mm_mask_fmsubadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4472 {
4473 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4474 (__v4sf) __B,
4475 -(__v4sf) __C,
4476 (__mmask8) __U);
4477 }
4478
4479 extern __inline __m128
4480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4481 _mm_mask3_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C,
4482 __mmask8 __U)
4483 {
4484 return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
4485 (__v4sf) __B,
4486 (__v4sf) __C,
4487 (__mmask8) __U);
4488 }
4489
4490 extern __inline __m128
4491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4492 _mm_maskz_fmsubadd_ps (__mmask8 __U, __m128 __A, __m128 __B,
4493 __m128 __C)
4494 {
4495 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4496 (__v4sf) __B,
4497 -(__v4sf) __C,
4498 (__mmask8) __U);
4499 }
4500
4501 extern __inline __m256d
4502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4503 _mm256_mask_fnmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4504 __m256d __C)
4505 {
4506 return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
4507 (__v4df) __B,
4508 (__v4df) __C,
4509 (__mmask8) __U);
4510 }
4511
4512 extern __inline __m256d
4513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4514 _mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4515 __mmask8 __U)
4516 {
4517 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
4518 (__v4df) __B,
4519 (__v4df) __C,
4520 (__mmask8) __U);
4521 }
4522
4523 extern __inline __m256d
4524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4525 _mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4526 __m256d __C)
4527 {
4528 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
4529 (__v4df) __B,
4530 (__v4df) __C,
4531 (__mmask8) __U);
4532 }
4533
4534 extern __inline __m128d
4535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4536 _mm_mask_fnmadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4537 __m128d __C)
4538 {
4539 return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
4540 (__v2df) __B,
4541 (__v2df) __C,
4542 (__mmask8) __U);
4543 }
4544
4545 extern __inline __m128d
4546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4547 _mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4548 __mmask8 __U)
4549 {
4550 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
4551 (__v2df) __B,
4552 (__v2df) __C,
4553 (__mmask8) __U);
4554 }
4555
4556 extern __inline __m128d
4557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4558 _mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4559 __m128d __C)
4560 {
4561 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
4562 (__v2df) __B,
4563 (__v2df) __C,
4564 (__mmask8) __U);
4565 }
4566
4567 extern __inline __m256
4568 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4569 _mm256_mask_fnmadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4570 __m256 __C)
4571 {
4572 return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
4573 (__v8sf) __B,
4574 (__v8sf) __C,
4575 (__mmask8) __U);
4576 }
4577
4578 extern __inline __m256
4579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4580 _mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4581 __mmask8 __U)
4582 {
4583 return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
4584 (__v8sf) __B,
4585 (__v8sf) __C,
4586 (__mmask8) __U);
4587 }
4588
4589 extern __inline __m256
4590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4591 _mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4592 __m256 __C)
4593 {
4594 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
4595 (__v8sf) __B,
4596 (__v8sf) __C,
4597 (__mmask8) __U);
4598 }
4599
4600 extern __inline __m128
4601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4602 _mm_mask_fnmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4603 {
4604 return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
4605 (__v4sf) __B,
4606 (__v4sf) __C,
4607 (__mmask8) __U);
4608 }
4609
4610 extern __inline __m128
4611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4612 _mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4613 {
4614 return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
4615 (__v4sf) __B,
4616 (__v4sf) __C,
4617 (__mmask8) __U);
4618 }
4619
4620 extern __inline __m128
4621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4622 _mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4623 {
4624 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
4625 (__v4sf) __B,
4626 (__v4sf) __C,
4627 (__mmask8) __U);
4628 }
4629
4630 extern __inline __m256d
4631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4632 _mm256_mask_fnmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4633 __m256d __C)
4634 {
4635 return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
4636 (__v4df) __B,
4637 (__v4df) __C,
4638 (__mmask8) __U);
4639 }
4640
4641 extern __inline __m256d
4642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4643 _mm256_mask3_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4644 __mmask8 __U)
4645 {
4646 return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
4647 (__v4df) __B,
4648 (__v4df) __C,
4649 (__mmask8) __U);
4650 }
4651
4652 extern __inline __m256d
4653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4654 _mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4655 __m256d __C)
4656 {
4657 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
4658 (__v4df) __B,
4659 -(__v4df) __C,
4660 (__mmask8) __U);
4661 }
4662
4663 extern __inline __m128d
4664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4665 _mm_mask_fnmsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4666 __m128d __C)
4667 {
4668 return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
4669 (__v2df) __B,
4670 (__v2df) __C,
4671 (__mmask8) __U);
4672 }
4673
4674 extern __inline __m128d
4675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4676 _mm_mask3_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4677 __mmask8 __U)
4678 {
4679 return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
4680 (__v2df) __B,
4681 (__v2df) __C,
4682 (__mmask8) __U);
4683 }
4684
4685 extern __inline __m128d
4686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4687 _mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4688 __m128d __C)
4689 {
4690 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
4691 (__v2df) __B,
4692 -(__v2df) __C,
4693 (__mmask8) __U);
4694 }
4695
4696 extern __inline __m256
4697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4698 _mm256_mask_fnmsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4699 __m256 __C)
4700 {
4701 return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
4702 (__v8sf) __B,
4703 (__v8sf) __C,
4704 (__mmask8) __U);
4705 }
4706
4707 extern __inline __m256
4708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4709 _mm256_mask3_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4710 __mmask8 __U)
4711 {
4712 return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
4713 (__v8sf) __B,
4714 (__v8sf) __C,
4715 (__mmask8) __U);
4716 }
4717
4718 extern __inline __m256
4719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4720 _mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4721 __m256 __C)
4722 {
4723 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
4724 (__v8sf) __B,
4725 -(__v8sf) __C,
4726 (__mmask8) __U);
4727 }
4728
4729 extern __inline __m128
4730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4731 _mm_mask_fnmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4732 {
4733 return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
4734 (__v4sf) __B,
4735 (__v4sf) __C,
4736 (__mmask8) __U);
4737 }
4738
4739 extern __inline __m128
4740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4741 _mm_mask3_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4742 {
4743 return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
4744 (__v4sf) __B,
4745 (__v4sf) __C,
4746 (__mmask8) __U);
4747 }
4748
4749 extern __inline __m128
4750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4751 _mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4752 {
4753 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
4754 (__v4sf) __B,
4755 -(__v4sf) __C,
4756 (__mmask8) __U);
4757 }
4758
4759 extern __inline __m128i
4760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4761 _mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4762 __m128i __B)
4763 {
4764 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4765 (__v4si) __B,
4766 (__v4si) __W,
4767 (__mmask8) __U);
4768 }
4769
4770 extern __inline __m128i
4771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4772 _mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4773 {
4774 return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4775 (__v4si) __B,
4776 (__v4si)
4777 _mm_setzero_si128 (),
4778 (__mmask8) __U);
4779 }
4780
4781 extern __inline __m256i
4782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4783 _mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4784 __m256i __B)
4785 {
4786 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4787 (__v8si) __B,
4788 (__v8si) __W,
4789 (__mmask8) __U);
4790 }
4791
4792 extern __inline __m256i
4793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4794 _mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4795 {
4796 return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4797 (__v8si) __B,
4798 (__v8si)
4799 _mm256_setzero_si256 (),
4800 (__mmask8) __U);
4801 }
4802
4803 extern __inline __m128i
4804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4805 _mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4806 __m128i __B)
4807 {
4808 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4809 (__v4si) __B,
4810 (__v4si) __W,
4811 (__mmask8) __U);
4812 }
4813
4814 extern __inline __m128i
4815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4816 _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4817 {
4818 return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4819 (__v4si) __B,
4820 (__v4si)
4821 _mm_setzero_si128 (),
4822 (__mmask8) __U);
4823 }
4824
4825 extern __inline __m256i
4826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4827 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4828 __m256i __B)
4829 {
4830 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4831 (__v8si) __B,
4832 (__v8si) __W,
4833 (__mmask8) __U);
4834 }
4835
4836 extern __inline __m256i
4837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4838 _mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4839 {
4840 return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4841 (__v8si) __B,
4842 (__v8si)
4843 _mm256_setzero_si256 (),
4844 (__mmask8) __U);
4845 }
4846
4847 extern __inline __m128i
4848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4849 _mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4850 {
4851 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4852 (__v4si) __B,
4853 (__v4si) __W,
4854 (__mmask8) __U);
4855 }
4856
4857 extern __inline __m128i
4858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4859 _mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4860 {
4861 return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4862 (__v4si) __B,
4863 (__v4si)
4864 _mm_setzero_si128 (),
4865 (__mmask8) __U);
4866 }
4867
4868 extern __inline __m256i
4869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4870 _mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4871 __m256i __B)
4872 {
4873 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4874 (__v8si) __B,
4875 (__v8si) __W,
4876 (__mmask8) __U);
4877 }
4878
4879 extern __inline __m256i
4880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4881 _mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4882 {
4883 return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4884 (__v8si) __B,
4885 (__v8si)
4886 _mm256_setzero_si256 (),
4887 (__mmask8) __U);
4888 }
4889
4890 extern __inline __m128i
4891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4892 _mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4893 __m128i __B)
4894 {
4895 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4896 (__v4si) __B,
4897 (__v4si) __W,
4898 (__mmask8) __U);
4899 }
4900
4901 extern __inline __m128i
4902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4903 _mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4904 {
4905 return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4906 (__v4si) __B,
4907 (__v4si)
4908 _mm_setzero_si128 (),
4909 (__mmask8) __U);
4910 }
4911
4912 extern __inline __m128
4913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4914 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A)
4915 {
4916 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4917 (__v4sf) __W,
4918 (__mmask8) __U);
4919 }
4920
4921 extern __inline __m128
4922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4923 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A)
4924 {
4925 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4926 (__v4sf)
4927 _mm_setzero_ps (),
4928 (__mmask8) __U);
4929 }
4930
4931 extern __inline __m128
4932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4933 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A)
4934 {
4935 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4936 (__v4sf) __W,
4937 (__mmask8) __U);
4938 }
4939
4940 extern __inline __m128
4941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4942 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A)
4943 {
4944 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4945 (__v4sf)
4946 _mm_setzero_ps (),
4947 (__mmask8) __U);
4948 }
4949
4950 extern __inline __m256i
4951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4952 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
4953 {
4954 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4955 (__v8si) __W,
4956 (__mmask8) __U);
4957 }
4958
4959 extern __inline __m256i
4960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4961 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A)
4962 {
4963 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4964 (__v8si)
4965 _mm256_setzero_si256 (),
4966 (__mmask8) __U);
4967 }
4968
4969 extern __inline __m128i
4970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4971 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
4972 {
4973 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
4974 (__v4si) __W,
4975 (__mmask8) __U);
4976 }
4977
4978 extern __inline __m128i
4979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4980 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A)
4981 {
4982 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
4983 (__v4si)
4984 _mm_setzero_si128 (),
4985 (__mmask8) __U);
4986 }
4987
4988 extern __inline __m256i
4989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4990 _mm256_cvtps_epu32 (__m256 __A)
4991 {
4992 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
4993 (__v8si)
4994 _mm256_setzero_si256 (),
4995 (__mmask8) -1);
4996 }
4997
4998 extern __inline __m256i
4999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5000 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
5001 {
5002 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5003 (__v8si) __W,
5004 (__mmask8) __U);
5005 }
5006
5007 extern __inline __m256i
5008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5009 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A)
5010 {
5011 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5012 (__v8si)
5013 _mm256_setzero_si256 (),
5014 (__mmask8) __U);
5015 }
5016
5017 extern __inline __m128i
5018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5019 _mm_cvtps_epu32 (__m128 __A)
5020 {
5021 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5022 (__v4si)
5023 _mm_setzero_si128 (),
5024 (__mmask8) -1);
5025 }
5026
5027 extern __inline __m128i
5028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5029 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
5030 {
5031 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5032 (__v4si) __W,
5033 (__mmask8) __U);
5034 }
5035
5036 extern __inline __m128i
5037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5038 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A)
5039 {
5040 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5041 (__v4si)
5042 _mm_setzero_si128 (),
5043 (__mmask8) __U);
5044 }
5045
5046 extern __inline __m256d
5047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5048 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5049 {
5050 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5051 (__v4df) __W,
5052 (__mmask8) __U);
5053 }
5054
5055 extern __inline __m256d
5056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5057 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5058 {
5059 return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5060 (__v4df)
5061 _mm256_setzero_pd (),
5062 (__mmask8) __U);
5063 }
5064
5065 extern __inline __m128d
5066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5067 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5068 {
5069 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5070 (__v2df) __W,
5071 (__mmask8) __U);
5072 }
5073
5074 extern __inline __m128d
5075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5076 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5077 {
5078 return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5079 (__v2df)
5080 _mm_setzero_pd (),
5081 (__mmask8) __U);
5082 }
5083
5084 extern __inline __m256
5085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5086 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5087 {
5088 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5089 (__v8sf) __W,
5090 (__mmask8) __U);
5091 }
5092
5093 extern __inline __m256
5094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5095 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
5096 {
5097 return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5098 (__v8sf)
5099 _mm256_setzero_ps (),
5100 (__mmask8) __U);
5101 }
5102
5103 extern __inline __m128
5104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5105 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5106 {
5107 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5108 (__v4sf) __W,
5109 (__mmask8) __U);
5110 }
5111
5112 extern __inline __m128
5113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5114 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
5115 {
5116 return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5117 (__v4sf)
5118 _mm_setzero_ps (),
5119 (__mmask8) __U);
5120 }
5121
5122 extern __inline __m256
5123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5124 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5125 {
5126 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5127 (__v8sf) __W,
5128 (__mmask8) __U);
5129 }
5130
5131 extern __inline __m256
5132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5133 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
5134 {
5135 return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5136 (__v8sf)
5137 _mm256_setzero_ps (),
5138 (__mmask8) __U);
5139 }
5140
5141 extern __inline __m128
5142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5143 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5144 {
5145 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5146 (__v4sf) __W,
5147 (__mmask8) __U);
5148 }
5149
5150 extern __inline __m128
5151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5152 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
5153 {
5154 return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5155 (__v4sf)
5156 _mm_setzero_ps (),
5157 (__mmask8) __U);
5158 }
5159
5160 extern __inline __m128i
5161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5162 _mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5163 __m128i __B)
5164 {
5165 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5166 (__v4si) __B,
5167 (__v4si) __W,
5168 (__mmask8) __U);
5169 }
5170
5171 extern __inline __m128i
5172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5173 _mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5174 {
5175 return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5176 (__v4si) __B,
5177 (__v4si)
5178 _mm_setzero_si128 (),
5179 (__mmask8) __U);
5180 }
5181
5182 extern __inline __m256i
5183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5184 _mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5185 __m256i __B)
5186 {
5187 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5188 (__v8si) __B,
5189 (__v8si) __W,
5190 (__mmask8) __U);
5191 }
5192
5193 extern __inline __m256i
5194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5195 _mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5196 {
5197 return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5198 (__v8si) __B,
5199 (__v8si)
5200 _mm256_setzero_si256 (),
5201 (__mmask8) __U);
5202 }
5203
5204 extern __inline __m128i
5205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5206 _mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5207 __m128i __B)
5208 {
5209 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5210 (__v2di) __B,
5211 (__v2di) __W,
5212 (__mmask8) __U);
5213 }
5214
5215 extern __inline __m128i
5216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5217 _mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5218 {
5219 return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5220 (__v2di) __B,
5221 (__v2di)
5222 _mm_setzero_di (),
5223 (__mmask8) __U);
5224 }
5225
5226 extern __inline __m256i
5227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5228 _mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5229 __m256i __B)
5230 {
5231 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5232 (__v4di) __B,
5233 (__v4di) __W,
5234 (__mmask8) __U);
5235 }
5236
5237 extern __inline __m256i
5238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5239 _mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5240 {
5241 return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5242 (__v4di) __B,
5243 (__v4di)
5244 _mm256_setzero_si256 (),
5245 (__mmask8) __U);
5246 }
5247
5248 extern __inline __m128i
5249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5250 _mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5251 __m128i __B)
5252 {
5253 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5254 (__v4si) __B,
5255 (__v4si) __W,
5256 (__mmask8) __U);
5257 }
5258
5259 extern __inline __m128i
5260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5261 _mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5262 {
5263 return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5264 (__v4si) __B,
5265 (__v4si)
5266 _mm_setzero_si128 (),
5267 (__mmask8) __U);
5268 }
5269
5270 extern __inline __m256i
5271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5272 _mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5273 __m256i __B)
5274 {
5275 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5276 (__v8si) __B,
5277 (__v8si) __W,
5278 (__mmask8) __U);
5279 }
5280
5281 extern __inline __m256i
5282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5283 _mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5284 {
5285 return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5286 (__v8si) __B,
5287 (__v8si)
5288 _mm256_setzero_si256 (),
5289 (__mmask8) __U);
5290 }
5291
5292 extern __inline __m128i
5293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5294 _mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5295 __m128i __B)
5296 {
5297 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5298 (__v2di) __B,
5299 (__v2di) __W,
5300 (__mmask8) __U);
5301 }
5302
5303 extern __inline __m128i
5304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5305 _mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5306 {
5307 return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5308 (__v2di) __B,
5309 (__v2di)
5310 _mm_setzero_di (),
5311 (__mmask8) __U);
5312 }
5313
5314 extern __inline __m256i
5315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5316 _mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5317 __m256i __B)
5318 {
5319 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5320 (__v4di) __B,
5321 (__v4di) __W,
5322 (__mmask8) __U);
5323 }
5324
5325 extern __inline __m256i
5326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5327 _mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5328 {
5329 return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5330 (__v4di) __B,
5331 (__v4di)
5332 _mm256_setzero_si256 (),
5333 (__mmask8) __U);
5334 }
5335
5336 extern __inline __mmask8
5337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5338 _mm_cmpeq_epu32_mask (__m128i __A, __m128i __B)
5339 {
5340 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5341 (__v4si) __B, 0,
5342 (__mmask8) -1);
5343 }
5344
5345 extern __inline __mmask8
5346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5347 _mm_cmpeq_epi32_mask (__m128i __A, __m128i __B)
5348 {
5349 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5350 (__v4si) __B,
5351 (__mmask8) -1);
5352 }
5353
5354 extern __inline __mmask8
5355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5356 _mm_mask_cmpeq_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5357 {
5358 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5359 (__v4si) __B, 0, __U);
5360 }
5361
5362 extern __inline __mmask8
5363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5364 _mm_mask_cmpeq_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5365 {
5366 return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5367 (__v4si) __B, __U);
5368 }
5369
5370 extern __inline __mmask8
5371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5372 _mm256_cmpeq_epu32_mask (__m256i __A, __m256i __B)
5373 {
5374 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5375 (__v8si) __B, 0,
5376 (__mmask8) -1);
5377 }
5378
5379 extern __inline __mmask8
5380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5381 _mm256_cmpeq_epi32_mask (__m256i __A, __m256i __B)
5382 {
5383 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5384 (__v8si) __B,
5385 (__mmask8) -1);
5386 }
5387
5388 extern __inline __mmask8
5389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5390 _mm256_mask_cmpeq_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5391 {
5392 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5393 (__v8si) __B, 0, __U);
5394 }
5395
5396 extern __inline __mmask8
5397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5398 _mm256_mask_cmpeq_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5399 {
5400 return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5401 (__v8si) __B, __U);
5402 }
5403
5404 extern __inline __mmask8
5405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5406 _mm_cmpeq_epu64_mask (__m128i __A, __m128i __B)
5407 {
5408 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5409 (__v2di) __B, 0,
5410 (__mmask8) -1);
5411 }
5412
5413 extern __inline __mmask8
5414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5415 _mm_cmpeq_epi64_mask (__m128i __A, __m128i __B)
5416 {
5417 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5418 (__v2di) __B,
5419 (__mmask8) -1);
5420 }
5421
5422 extern __inline __mmask8
5423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5424 _mm_mask_cmpeq_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5425 {
5426 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5427 (__v2di) __B, 0, __U);
5428 }
5429
5430 extern __inline __mmask8
5431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5432 _mm_mask_cmpeq_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5433 {
5434 return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5435 (__v2di) __B, __U);
5436 }
5437
5438 extern __inline __mmask8
5439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5440 _mm256_cmpeq_epu64_mask (__m256i __A, __m256i __B)
5441 {
5442 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5443 (__v4di) __B, 0,
5444 (__mmask8) -1);
5445 }
5446
5447 extern __inline __mmask8
5448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5449 _mm256_cmpeq_epi64_mask (__m256i __A, __m256i __B)
5450 {
5451 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5452 (__v4di) __B,
5453 (__mmask8) -1);
5454 }
5455
5456 extern __inline __mmask8
5457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5458 _mm256_mask_cmpeq_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5459 {
5460 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5461 (__v4di) __B, 0, __U);
5462 }
5463
5464 extern __inline __mmask8
5465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5466 _mm256_mask_cmpeq_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5467 {
5468 return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5469 (__v4di) __B, __U);
5470 }
5471
5472 extern __inline __mmask8
5473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5474 _mm_cmpgt_epu32_mask (__m128i __A, __m128i __B)
5475 {
5476 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5477 (__v4si) __B, 6,
5478 (__mmask8) -1);
5479 }
5480
5481 extern __inline __mmask8
5482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5483 _mm_cmpgt_epi32_mask (__m128i __A, __m128i __B)
5484 {
5485 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5486 (__v4si) __B,
5487 (__mmask8) -1);
5488 }
5489
5490 extern __inline __mmask8
5491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5492 _mm_mask_cmpgt_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5493 {
5494 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5495 (__v4si) __B, 6, __U);
5496 }
5497
5498 extern __inline __mmask8
5499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5500 _mm_mask_cmpgt_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5501 {
5502 return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5503 (__v4si) __B, __U);
5504 }
5505
5506 extern __inline __mmask8
5507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5508 _mm256_cmpgt_epu32_mask (__m256i __A, __m256i __B)
5509 {
5510 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5511 (__v8si) __B, 6,
5512 (__mmask8) -1);
5513 }
5514
5515 extern __inline __mmask8
5516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5517 _mm256_cmpgt_epi32_mask (__m256i __A, __m256i __B)
5518 {
5519 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5520 (__v8si) __B,
5521 (__mmask8) -1);
5522 }
5523
5524 extern __inline __mmask8
5525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5526 _mm256_mask_cmpgt_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5527 {
5528 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5529 (__v8si) __B, 6, __U);
5530 }
5531
5532 extern __inline __mmask8
5533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5534 _mm256_mask_cmpgt_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5535 {
5536 return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5537 (__v8si) __B, __U);
5538 }
5539
5540 extern __inline __mmask8
5541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5542 _mm_cmpgt_epu64_mask (__m128i __A, __m128i __B)
5543 {
5544 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5545 (__v2di) __B, 6,
5546 (__mmask8) -1);
5547 }
5548
5549 extern __inline __mmask8
5550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5551 _mm_cmpgt_epi64_mask (__m128i __A, __m128i __B)
5552 {
5553 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5554 (__v2di) __B,
5555 (__mmask8) -1);
5556 }
5557
5558 extern __inline __mmask8
5559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5560 _mm_mask_cmpgt_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5561 {
5562 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5563 (__v2di) __B, 6, __U);
5564 }
5565
5566 extern __inline __mmask8
5567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5568 _mm_mask_cmpgt_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5569 {
5570 return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5571 (__v2di) __B, __U);
5572 }
5573
5574 extern __inline __mmask8
5575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5576 _mm256_cmpgt_epu64_mask (__m256i __A, __m256i __B)
5577 {
5578 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5579 (__v4di) __B, 6,
5580 (__mmask8) -1);
5581 }
5582
5583 extern __inline __mmask8
5584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5585 _mm256_cmpgt_epi64_mask (__m256i __A, __m256i __B)
5586 {
5587 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5588 (__v4di) __B,
5589 (__mmask8) -1);
5590 }
5591
5592 extern __inline __mmask8
5593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5594 _mm256_mask_cmpgt_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5595 {
5596 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5597 (__v4di) __B, 6, __U);
5598 }
5599
5600 extern __inline __mmask8
5601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5602 _mm256_mask_cmpgt_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5603 {
5604 return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5605 (__v4di) __B, __U);
5606 }
5607
5608 extern __inline __mmask8
5609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5610 _mm_test_epi32_mask (__m128i __A, __m128i __B)
5611 {
5612 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5613 (__v4si) __B,
5614 (__mmask8) -1);
5615 }
5616
5617 extern __inline __mmask8
5618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5619 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5620 {
5621 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5622 (__v4si) __B, __U);
5623 }
5624
5625 extern __inline __mmask8
5626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5627 _mm256_test_epi32_mask (__m256i __A, __m256i __B)
5628 {
5629 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5630 (__v8si) __B,
5631 (__mmask8) -1);
5632 }
5633
5634 extern __inline __mmask8
5635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5636 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5637 {
5638 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5639 (__v8si) __B, __U);
5640 }
5641
5642 extern __inline __mmask8
5643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5644 _mm_test_epi64_mask (__m128i __A, __m128i __B)
5645 {
5646 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5647 (__v2di) __B,
5648 (__mmask8) -1);
5649 }
5650
5651 extern __inline __mmask8
5652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5653 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5654 {
5655 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5656 (__v2di) __B, __U);
5657 }
5658
5659 extern __inline __mmask8
5660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5661 _mm256_test_epi64_mask (__m256i __A, __m256i __B)
5662 {
5663 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5664 (__v4di) __B,
5665 (__mmask8) -1);
5666 }
5667
5668 extern __inline __mmask8
5669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5670 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5671 {
5672 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5673 (__v4di) __B, __U);
5674 }
5675
5676 extern __inline __mmask8
5677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5678 _mm_testn_epi32_mask (__m128i __A, __m128i __B)
5679 {
5680 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5681 (__v4si) __B,
5682 (__mmask8) -1);
5683 }
5684
5685 extern __inline __mmask8
5686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5687 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5688 {
5689 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5690 (__v4si) __B, __U);
5691 }
5692
5693 extern __inline __mmask8
5694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5695 _mm256_testn_epi32_mask (__m256i __A, __m256i __B)
5696 {
5697 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5698 (__v8si) __B,
5699 (__mmask8) -1);
5700 }
5701
5702 extern __inline __mmask8
5703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5704 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5705 {
5706 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5707 (__v8si) __B, __U);
5708 }
5709
5710 extern __inline __mmask8
5711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5712 _mm_testn_epi64_mask (__m128i __A, __m128i __B)
5713 {
5714 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5715 (__v2di) __B,
5716 (__mmask8) -1);
5717 }
5718
5719 extern __inline __mmask8
5720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5721 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5722 {
5723 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5724 (__v2di) __B, __U);
5725 }
5726
5727 extern __inline __mmask8
5728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5729 _mm256_testn_epi64_mask (__m256i __A, __m256i __B)
5730 {
5731 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5732 (__v4di) __B,
5733 (__mmask8) -1);
5734 }
5735
5736 extern __inline __mmask8
5737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5738 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5739 {
5740 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5741 (__v4di) __B, __U);
5742 }
5743
5744 extern __inline __m256d
5745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5746 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A)
5747 {
5748 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5749 (__v4df) __W,
5750 (__mmask8) __U);
5751 }
5752
5753 extern __inline __m256d
5754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5755 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A)
5756 {
5757 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5758 (__v4df)
5759 _mm256_setzero_pd (),
5760 (__mmask8) __U);
5761 }
5762
5763 extern __inline void
5764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5765 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A)
5766 {
5767 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
5768 (__v4df) __A,
5769 (__mmask8) __U);
5770 }
5771
5772 extern __inline __m128d
5773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5774 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A)
5775 {
5776 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5777 (__v2df) __W,
5778 (__mmask8) __U);
5779 }
5780
5781 extern __inline __m128d
5782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5783 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A)
5784 {
5785 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5786 (__v2df)
5787 _mm_setzero_pd (),
5788 (__mmask8) __U);
5789 }
5790
5791 extern __inline void
5792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5793 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A)
5794 {
5795 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
5796 (__v2df) __A,
5797 (__mmask8) __U);
5798 }
5799
5800 extern __inline __m256
5801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5802 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A)
5803 {
5804 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5805 (__v8sf) __W,
5806 (__mmask8) __U);
5807 }
5808
5809 extern __inline __m256
5810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5811 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A)
5812 {
5813 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5814 (__v8sf)
5815 _mm256_setzero_ps (),
5816 (__mmask8) __U);
5817 }
5818
5819 extern __inline void
5820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5821 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A)
5822 {
5823 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
5824 (__v8sf) __A,
5825 (__mmask8) __U);
5826 }
5827
5828 extern __inline __m128
5829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5830 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A)
5831 {
5832 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5833 (__v4sf) __W,
5834 (__mmask8) __U);
5835 }
5836
5837 extern __inline __m128
5838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5839 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A)
5840 {
5841 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5842 (__v4sf)
5843 _mm_setzero_ps (),
5844 (__mmask8) __U);
5845 }
5846
5847 extern __inline void
5848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5849 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A)
5850 {
5851 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
5852 (__v4sf) __A,
5853 (__mmask8) __U);
5854 }
5855
5856 extern __inline __m256i
5857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5858 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5859 {
5860 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5861 (__v4di) __W,
5862 (__mmask8) __U);
5863 }
5864
5865 extern __inline __m256i
5866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5867 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A)
5868 {
5869 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5870 (__v4di)
5871 _mm256_setzero_si256 (),
5872 (__mmask8) __U);
5873 }
5874
5875 extern __inline void
5876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5877 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5878 {
5879 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
5880 (__v4di) __A,
5881 (__mmask8) __U);
5882 }
5883
5884 extern __inline __m128i
5885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5886 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5887 {
5888 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5889 (__v2di) __W,
5890 (__mmask8) __U);
5891 }
5892
5893 extern __inline __m128i
5894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5895 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
5896 {
5897 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5898 (__v2di)
5899 _mm_setzero_di (),
5900 (__mmask8) __U);
5901 }
5902
5903 extern __inline void
5904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5905 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5906 {
5907 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
5908 (__v2di) __A,
5909 (__mmask8) __U);
5910 }
5911
5912 extern __inline __m256i
5913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5914 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5915 {
5916 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5917 (__v8si) __W,
5918 (__mmask8) __U);
5919 }
5920
5921 extern __inline __m256i
5922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5923 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A)
5924 {
5925 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5926 (__v8si)
5927 _mm256_setzero_si256 (),
5928 (__mmask8) __U);
5929 }
5930
5931 extern __inline void
5932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5933 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5934 {
5935 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
5936 (__v8si) __A,
5937 (__mmask8) __U);
5938 }
5939
5940 extern __inline __m128i
5941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5942 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5943 {
5944 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5945 (__v4si) __W,
5946 (__mmask8) __U);
5947 }
5948
5949 extern __inline __m128i
5950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5951 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A)
5952 {
5953 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5954 (__v4si)
5955 _mm_setzero_si128 (),
5956 (__mmask8) __U);
5957 }
5958
5959 extern __inline void
5960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5961 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5962 {
5963 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
5964 (__v4si) __A,
5965 (__mmask8) __U);
5966 }
5967
5968 extern __inline __m256d
5969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5970 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A)
5971 {
5972 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
5973 (__v4df) __W,
5974 (__mmask8) __U);
5975 }
5976
5977 extern __inline __m256d
5978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5979 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A)
5980 {
5981 return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A,
5982 (__v4df)
5983 _mm256_setzero_pd (),
5984 (__mmask8) __U);
5985 }
5986
5987 extern __inline __m256d
5988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5989 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P)
5990 {
5991 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
5992 (__v4df) __W,
5993 (__mmask8)
5994 __U);
5995 }
5996
5997 extern __inline __m256d
5998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5999 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6000 {
6001 return (__m256d) __builtin_ia32_expandloaddf256_maskz ((__v4df *) __P,
6002 (__v4df)
6003 _mm256_setzero_pd (),
6004 (__mmask8)
6005 __U);
6006 }
6007
6008 extern __inline __m128d
6009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6010 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A)
6011 {
6012 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
6013 (__v2df) __W,
6014 (__mmask8) __U);
6015 }
6016
6017 extern __inline __m128d
6018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6019 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A)
6020 {
6021 return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A,
6022 (__v2df)
6023 _mm_setzero_pd (),
6024 (__mmask8) __U);
6025 }
6026
6027 extern __inline __m128d
6028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6029 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P)
6030 {
6031 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
6032 (__v2df) __W,
6033 (__mmask8)
6034 __U);
6035 }
6036
6037 extern __inline __m128d
6038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6039 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6040 {
6041 return (__m128d) __builtin_ia32_expandloaddf128_maskz ((__v2df *) __P,
6042 (__v2df)
6043 _mm_setzero_pd (),
6044 (__mmask8)
6045 __U);
6046 }
6047
6048 extern __inline __m256
6049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6050 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A)
6051 {
6052 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
6053 (__v8sf) __W,
6054 (__mmask8) __U);
6055 }
6056
6057 extern __inline __m256
6058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6059 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A)
6060 {
6061 return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A,
6062 (__v8sf)
6063 _mm256_setzero_ps (),
6064 (__mmask8) __U);
6065 }
6066
6067 extern __inline __m256
6068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6069 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P)
6070 {
6071 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
6072 (__v8sf) __W,
6073 (__mmask8) __U);
6074 }
6075
6076 extern __inline __m256
6077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6078 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6079 {
6080 return (__m256) __builtin_ia32_expandloadsf256_maskz ((__v8sf *) __P,
6081 (__v8sf)
6082 _mm256_setzero_ps (),
6083 (__mmask8)
6084 __U);
6085 }
6086
6087 extern __inline __m128
6088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6089 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A)
6090 {
6091 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
6092 (__v4sf) __W,
6093 (__mmask8) __U);
6094 }
6095
6096 extern __inline __m128
6097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6098 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A)
6099 {
6100 return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A,
6101 (__v4sf)
6102 _mm_setzero_ps (),
6103 (__mmask8) __U);
6104 }
6105
6106 extern __inline __m128
6107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6108 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P)
6109 {
6110 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
6111 (__v4sf) __W,
6112 (__mmask8) __U);
6113 }
6114
6115 extern __inline __m128
6116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6117 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6118 {
6119 return (__m128) __builtin_ia32_expandloadsf128_maskz ((__v4sf *) __P,
6120 (__v4sf)
6121 _mm_setzero_ps (),
6122 (__mmask8)
6123 __U);
6124 }
6125
6126 extern __inline __m256i
6127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6128 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
6129 {
6130 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
6131 (__v4di) __W,
6132 (__mmask8) __U);
6133 }
6134
6135 extern __inline __m256i
6136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6137 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A)
6138 {
6139 return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A,
6140 (__v4di)
6141 _mm256_setzero_si256 (),
6142 (__mmask8) __U);
6143 }
6144
6145 extern __inline __m256i
6146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6147 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
6148 void const *__P)
6149 {
6150 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
6151 (__v4di) __W,
6152 (__mmask8)
6153 __U);
6154 }
6155
6156 extern __inline __m256i
6157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6158 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6159 {
6160 return (__m256i) __builtin_ia32_expandloaddi256_maskz ((__v4di *) __P,
6161 (__v4di)
6162 _mm256_setzero_si256 (),
6163 (__mmask8)
6164 __U);
6165 }
6166
6167 extern __inline __m128i
6168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6169 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
6170 {
6171 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
6172 (__v2di) __W,
6173 (__mmask8) __U);
6174 }
6175
6176 extern __inline __m128i
6177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6178 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A)
6179 {
6180 return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A,
6181 (__v2di)
6182 _mm_setzero_si128 (),
6183 (__mmask8) __U);
6184 }
6185
6186 extern __inline __m128i
6187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6188 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
6189 {
6190 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
6191 (__v2di) __W,
6192 (__mmask8)
6193 __U);
6194 }
6195
6196 extern __inline __m128i
6197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6198 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6199 {
6200 return (__m128i) __builtin_ia32_expandloaddi128_maskz ((__v2di *) __P,
6201 (__v2di)
6202 _mm_setzero_si128 (),
6203 (__mmask8)
6204 __U);
6205 }
6206
6207 extern __inline __m256i
6208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6209 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
6210 {
6211 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
6212 (__v8si) __W,
6213 (__mmask8) __U);
6214 }
6215
6216 extern __inline __m256i
6217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6218 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A)
6219 {
6220 return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A,
6221 (__v8si)
6222 _mm256_setzero_si256 (),
6223 (__mmask8) __U);
6224 }
6225
6226 extern __inline __m256i
6227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6228 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
6229 void const *__P)
6230 {
6231 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
6232 (__v8si) __W,
6233 (__mmask8)
6234 __U);
6235 }
6236
6237 extern __inline __m256i
6238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6239 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6240 {
6241 return (__m256i) __builtin_ia32_expandloadsi256_maskz ((__v8si *) __P,
6242 (__v8si)
6243 _mm256_setzero_si256 (),
6244 (__mmask8)
6245 __U);
6246 }
6247
6248 extern __inline __m128i
6249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6250 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
6251 {
6252 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
6253 (__v4si) __W,
6254 (__mmask8) __U);
6255 }
6256
6257 extern __inline __m128i
6258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6259 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A)
6260 {
6261 return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A,
6262 (__v4si)
6263 _mm_setzero_si128 (),
6264 (__mmask8) __U);
6265 }
6266
6267 extern __inline __m128i
6268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6269 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
6270 {
6271 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
6272 (__v4si) __W,
6273 (__mmask8)
6274 __U);
6275 }
6276
6277 extern __inline __m128i
6278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6279 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6280 {
6281 return (__m128i) __builtin_ia32_expandloadsi128_maskz ((__v4si *) __P,
6282 (__v4si)
6283 _mm_setzero_si128 (),
6284 (__mmask8)
6285 __U);
6286 }
6287
6288 extern __inline __m256d
6289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6290 _mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B)
6291 {
6292 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6293 /* idx */ ,
6294 (__v4df) __A,
6295 (__v4df) __B,
6296 (__mmask8) -
6297 1);
6298 }
6299
6300 extern __inline __m256d
6301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6302 _mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
6303 __m256d __B)
6304 {
6305 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6306 /* idx */ ,
6307 (__v4df) __A,
6308 (__v4df) __B,
6309 (__mmask8)
6310 __U);
6311 }
6312
6313 extern __inline __m256d
6314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6315 _mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
6316 __m256d __B)
6317 {
6318 return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
6319 (__v4di) __I
6320 /* idx */ ,
6321 (__v4df) __B,
6322 (__mmask8)
6323 __U);
6324 }
6325
6326 extern __inline __m256d
6327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6328 _mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
6329 __m256d __B)
6330 {
6331 return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
6332 /* idx */ ,
6333 (__v4df) __A,
6334 (__v4df) __B,
6335 (__mmask8)
6336 __U);
6337 }
6338
6339 extern __inline __m256
6340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6341 _mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B)
6342 {
6343 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6344 /* idx */ ,
6345 (__v8sf) __A,
6346 (__v8sf) __B,
6347 (__mmask8) -1);
6348 }
6349
6350 extern __inline __m256
6351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6352 _mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
6353 __m256 __B)
6354 {
6355 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6356 /* idx */ ,
6357 (__v8sf) __A,
6358 (__v8sf) __B,
6359 (__mmask8) __U);
6360 }
6361
6362 extern __inline __m256
6363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6364 _mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
6365 __m256 __B)
6366 {
6367 return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
6368 (__v8si) __I
6369 /* idx */ ,
6370 (__v8sf) __B,
6371 (__mmask8) __U);
6372 }
6373
6374 extern __inline __m256
6375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6376 _mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
6377 __m256 __B)
6378 {
6379 return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
6380 /* idx */ ,
6381 (__v8sf) __A,
6382 (__v8sf) __B,
6383 (__mmask8)
6384 __U);
6385 }
6386
6387 extern __inline __m128i
6388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6389 _mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B)
6390 {
6391 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6392 /* idx */ ,
6393 (__v2di) __A,
6394 (__v2di) __B,
6395 (__mmask8) -1);
6396 }
6397
6398 extern __inline __m128i
6399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6400 _mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
6401 __m128i __B)
6402 {
6403 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6404 /* idx */ ,
6405 (__v2di) __A,
6406 (__v2di) __B,
6407 (__mmask8) __U);
6408 }
6409
6410 extern __inline __m128i
6411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6412 _mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
6413 __m128i __B)
6414 {
6415 return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
6416 (__v2di) __I
6417 /* idx */ ,
6418 (__v2di) __B,
6419 (__mmask8) __U);
6420 }
6421
6422 extern __inline __m128i
6423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6424 _mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
6425 __m128i __B)
6426 {
6427 return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
6428 /* idx */ ,
6429 (__v2di) __A,
6430 (__v2di) __B,
6431 (__mmask8)
6432 __U);
6433 }
6434
6435 extern __inline __m128i
6436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6437 _mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B)
6438 {
6439 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6440 /* idx */ ,
6441 (__v4si) __A,
6442 (__v4si) __B,
6443 (__mmask8) -1);
6444 }
6445
6446 extern __inline __m128i
6447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6448 _mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
6449 __m128i __B)
6450 {
6451 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6452 /* idx */ ,
6453 (__v4si) __A,
6454 (__v4si) __B,
6455 (__mmask8) __U);
6456 }
6457
6458 extern __inline __m128i
6459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6460 _mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
6461 __m128i __B)
6462 {
6463 return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
6464 (__v4si) __I
6465 /* idx */ ,
6466 (__v4si) __B,
6467 (__mmask8) __U);
6468 }
6469
6470 extern __inline __m128i
6471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6472 _mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
6473 __m128i __B)
6474 {
6475 return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
6476 /* idx */ ,
6477 (__v4si) __A,
6478 (__v4si) __B,
6479 (__mmask8)
6480 __U);
6481 }
6482
6483 extern __inline __m256i
6484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6485 _mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B)
6486 {
6487 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6488 /* idx */ ,
6489 (__v4di) __A,
6490 (__v4di) __B,
6491 (__mmask8) -1);
6492 }
6493
6494 extern __inline __m256i
6495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6496 _mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
6497 __m256i __B)
6498 {
6499 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6500 /* idx */ ,
6501 (__v4di) __A,
6502 (__v4di) __B,
6503 (__mmask8) __U);
6504 }
6505
6506 extern __inline __m256i
6507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6508 _mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
6509 __mmask8 __U, __m256i __B)
6510 {
6511 return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
6512 (__v4di) __I
6513 /* idx */ ,
6514 (__v4di) __B,
6515 (__mmask8) __U);
6516 }
6517
6518 extern __inline __m256i
6519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6520 _mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
6521 __m256i __I, __m256i __B)
6522 {
6523 return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
6524 /* idx */ ,
6525 (__v4di) __A,
6526 (__v4di) __B,
6527 (__mmask8)
6528 __U);
6529 }
6530
6531 extern __inline __m256i
6532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6533 _mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B)
6534 {
6535 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6536 /* idx */ ,
6537 (__v8si) __A,
6538 (__v8si) __B,
6539 (__mmask8) -1);
6540 }
6541
6542 extern __inline __m256i
6543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6544 _mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
6545 __m256i __B)
6546 {
6547 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6548 /* idx */ ,
6549 (__v8si) __A,
6550 (__v8si) __B,
6551 (__mmask8) __U);
6552 }
6553
6554 extern __inline __m256i
6555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6556 _mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
6557 __mmask8 __U, __m256i __B)
6558 {
6559 return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
6560 (__v8si) __I
6561 /* idx */ ,
6562 (__v8si) __B,
6563 (__mmask8) __U);
6564 }
6565
6566 extern __inline __m256i
6567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6568 _mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
6569 __m256i __I, __m256i __B)
6570 {
6571 return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
6572 /* idx */ ,
6573 (__v8si) __A,
6574 (__v8si) __B,
6575 (__mmask8)
6576 __U);
6577 }
6578
6579 extern __inline __m128d
6580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6581 _mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B)
6582 {
6583 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6584 /* idx */ ,
6585 (__v2df) __A,
6586 (__v2df) __B,
6587 (__mmask8) -
6588 1);
6589 }
6590
6591 extern __inline __m128d
6592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6593 _mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
6594 __m128d __B)
6595 {
6596 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6597 /* idx */ ,
6598 (__v2df) __A,
6599 (__v2df) __B,
6600 (__mmask8)
6601 __U);
6602 }
6603
6604 extern __inline __m128d
6605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6606 _mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
6607 __m128d __B)
6608 {
6609 return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
6610 (__v2di) __I
6611 /* idx */ ,
6612 (__v2df) __B,
6613 (__mmask8)
6614 __U);
6615 }
6616
6617 extern __inline __m128d
6618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6619 _mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
6620 __m128d __B)
6621 {
6622 return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
6623 /* idx */ ,
6624 (__v2df) __A,
6625 (__v2df) __B,
6626 (__mmask8)
6627 __U);
6628 }
6629
6630 extern __inline __m128
6631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6632 _mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B)
6633 {
6634 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6635 /* idx */ ,
6636 (__v4sf) __A,
6637 (__v4sf) __B,
6638 (__mmask8) -1);
6639 }
6640
6641 extern __inline __m128
6642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6643 _mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
6644 __m128 __B)
6645 {
6646 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6647 /* idx */ ,
6648 (__v4sf) __A,
6649 (__v4sf) __B,
6650 (__mmask8) __U);
6651 }
6652
6653 extern __inline __m128
6654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6655 _mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
6656 __m128 __B)
6657 {
6658 return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
6659 (__v4si) __I
6660 /* idx */ ,
6661 (__v4sf) __B,
6662 (__mmask8) __U);
6663 }
6664
6665 extern __inline __m128
6666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6667 _mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
6668 __m128 __B)
6669 {
6670 return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
6671 /* idx */ ,
6672 (__v4sf) __A,
6673 (__v4sf) __B,
6674 (__mmask8)
6675 __U);
6676 }
6677
6678 extern __inline __m128i
6679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6680 _mm_srav_epi64 (__m128i __X, __m128i __Y)
6681 {
6682 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6683 (__v2di) __Y,
6684 (__v2di)
6685 _mm_setzero_di (),
6686 (__mmask8) -1);
6687 }
6688
6689 extern __inline __m128i
6690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6691 _mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6692 __m128i __Y)
6693 {
6694 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6695 (__v2di) __Y,
6696 (__v2di) __W,
6697 (__mmask8) __U);
6698 }
6699
6700 extern __inline __m128i
6701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6702 _mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6703 {
6704 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6705 (__v2di) __Y,
6706 (__v2di)
6707 _mm_setzero_di (),
6708 (__mmask8) __U);
6709 }
6710
6711 extern __inline __m256i
6712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6713 _mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6714 __m256i __Y)
6715 {
6716 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6717 (__v8si) __Y,
6718 (__v8si) __W,
6719 (__mmask8) __U);
6720 }
6721
6722 extern __inline __m256i
6723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6724 _mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6725 {
6726 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6727 (__v8si) __Y,
6728 (__v8si)
6729 _mm256_setzero_si256 (),
6730 (__mmask8) __U);
6731 }
6732
6733 extern __inline __m128i
6734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6735 _mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6736 __m128i __Y)
6737 {
6738 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6739 (__v4si) __Y,
6740 (__v4si) __W,
6741 (__mmask8) __U);
6742 }
6743
6744 extern __inline __m128i
6745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6746 _mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6747 {
6748 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6749 (__v4si) __Y,
6750 (__v4si)
6751 _mm_setzero_si128 (),
6752 (__mmask8) __U);
6753 }
6754
6755 extern __inline __m256i
6756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6757 _mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6758 __m256i __Y)
6759 {
6760 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6761 (__v4di) __Y,
6762 (__v4di) __W,
6763 (__mmask8) __U);
6764 }
6765
6766 extern __inline __m256i
6767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6768 _mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6769 {
6770 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6771 (__v4di) __Y,
6772 (__v4di)
6773 _mm256_setzero_si256 (),
6774 (__mmask8) __U);
6775 }
6776
6777 extern __inline __m128i
6778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6779 _mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6780 __m128i __Y)
6781 {
6782 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6783 (__v2di) __Y,
6784 (__v2di) __W,
6785 (__mmask8) __U);
6786 }
6787
6788 extern __inline __m128i
6789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6790 _mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6791 {
6792 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6793 (__v2di) __Y,
6794 (__v2di)
6795 _mm_setzero_di (),
6796 (__mmask8) __U);
6797 }
6798
6799 extern __inline __m256i
6800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6801 _mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6802 __m256i __Y)
6803 {
6804 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6805 (__v8si) __Y,
6806 (__v8si) __W,
6807 (__mmask8) __U);
6808 }
6809
6810 extern __inline __m256i
6811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6812 _mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6813 {
6814 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6815 (__v8si) __Y,
6816 (__v8si)
6817 _mm256_setzero_si256 (),
6818 (__mmask8) __U);
6819 }
6820
6821 extern __inline __m128i
6822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6823 _mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6824 __m128i __Y)
6825 {
6826 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6827 (__v4si) __Y,
6828 (__v4si) __W,
6829 (__mmask8) __U);
6830 }
6831
6832 extern __inline __m128i
6833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6834 _mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6835 {
6836 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6837 (__v4si) __Y,
6838 (__v4si)
6839 _mm_setzero_si128 (),
6840 (__mmask8) __U);
6841 }
6842
6843 extern __inline __m256i
6844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6845 _mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6846 __m256i __Y)
6847 {
6848 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6849 (__v8si) __Y,
6850 (__v8si) __W,
6851 (__mmask8) __U);
6852 }
6853
6854 extern __inline __m256i
6855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6856 _mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6857 {
6858 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6859 (__v8si) __Y,
6860 (__v8si)
6861 _mm256_setzero_si256 (),
6862 (__mmask8) __U);
6863 }
6864
6865 extern __inline __m128i
6866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6867 _mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6868 __m128i __Y)
6869 {
6870 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6871 (__v4si) __Y,
6872 (__v4si) __W,
6873 (__mmask8) __U);
6874 }
6875
6876 extern __inline __m128i
6877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6878 _mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6879 {
6880 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6881 (__v4si) __Y,
6882 (__v4si)
6883 _mm_setzero_si128 (),
6884 (__mmask8) __U);
6885 }
6886
6887 extern __inline __m256i
6888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6889 _mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6890 __m256i __Y)
6891 {
6892 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6893 (__v4di) __Y,
6894 (__v4di) __W,
6895 (__mmask8) __U);
6896 }
6897
6898 extern __inline __m256i
6899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6900 _mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6901 {
6902 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6903 (__v4di) __Y,
6904 (__v4di)
6905 _mm256_setzero_si256 (),
6906 (__mmask8) __U);
6907 }
6908
6909 extern __inline __m128i
6910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6911 _mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6912 __m128i __Y)
6913 {
6914 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6915 (__v2di) __Y,
6916 (__v2di) __W,
6917 (__mmask8) __U);
6918 }
6919
6920 extern __inline __m128i
6921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6922 _mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6923 {
6924 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6925 (__v2di) __Y,
6926 (__v2di)
6927 _mm_setzero_di (),
6928 (__mmask8) __U);
6929 }
6930
6931 extern __inline __m256i
6932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6933 _mm256_rolv_epi32 (__m256i __A, __m256i __B)
6934 {
6935 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6936 (__v8si) __B,
6937 (__v8si)
6938 _mm256_setzero_si256 (),
6939 (__mmask8) -1);
6940 }
6941
6942 extern __inline __m256i
6943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6944 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
6945 __m256i __B)
6946 {
6947 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6948 (__v8si) __B,
6949 (__v8si) __W,
6950 (__mmask8) __U);
6951 }
6952
6953 extern __inline __m256i
6954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6955 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
6956 {
6957 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6958 (__v8si) __B,
6959 (__v8si)
6960 _mm256_setzero_si256 (),
6961 (__mmask8) __U);
6962 }
6963
6964 extern __inline __m128i
6965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6966 _mm_rolv_epi32 (__m128i __A, __m128i __B)
6967 {
6968 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
6969 (__v4si) __B,
6970 (__v4si)
6971 _mm_setzero_si128 (),
6972 (__mmask8) -1);
6973 }
6974
6975 extern __inline __m128i
6976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6977 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
6978 __m128i __B)
6979 {
6980 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
6981 (__v4si) __B,
6982 (__v4si) __W,
6983 (__mmask8) __U);
6984 }
6985
6986 extern __inline __m128i
6987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6988 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
6989 {
6990 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
6991 (__v4si) __B,
6992 (__v4si)
6993 _mm_setzero_si128 (),
6994 (__mmask8) __U);
6995 }
6996
6997 extern __inline __m256i
6998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6999 _mm256_rorv_epi32 (__m256i __A, __m256i __B)
7000 {
7001 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7002 (__v8si) __B,
7003 (__v8si)
7004 _mm256_setzero_si256 (),
7005 (__mmask8) -1);
7006 }
7007
7008 extern __inline __m256i
7009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7010 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
7011 __m256i __B)
7012 {
7013 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7014 (__v8si) __B,
7015 (__v8si) __W,
7016 (__mmask8) __U);
7017 }
7018
7019 extern __inline __m256i
7020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7021 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
7022 {
7023 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7024 (__v8si) __B,
7025 (__v8si)
7026 _mm256_setzero_si256 (),
7027 (__mmask8) __U);
7028 }
7029
7030 extern __inline __m128i
7031 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7032 _mm_rorv_epi32 (__m128i __A, __m128i __B)
7033 {
7034 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7035 (__v4si) __B,
7036 (__v4si)
7037 _mm_setzero_si128 (),
7038 (__mmask8) -1);
7039 }
7040
7041 extern __inline __m128i
7042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7043 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
7044 __m128i __B)
7045 {
7046 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7047 (__v4si) __B,
7048 (__v4si) __W,
7049 (__mmask8) __U);
7050 }
7051
7052 extern __inline __m128i
7053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7054 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
7055 {
7056 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7057 (__v4si) __B,
7058 (__v4si)
7059 _mm_setzero_si128 (),
7060 (__mmask8) __U);
7061 }
7062
7063 extern __inline __m256i
7064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7065 _mm256_rolv_epi64 (__m256i __A, __m256i __B)
7066 {
7067 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7068 (__v4di) __B,
7069 (__v4di)
7070 _mm256_setzero_si256 (),
7071 (__mmask8) -1);
7072 }
7073
7074 extern __inline __m256i
7075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7076 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7077 __m256i __B)
7078 {
7079 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7080 (__v4di) __B,
7081 (__v4di) __W,
7082 (__mmask8) __U);
7083 }
7084
7085 extern __inline __m256i
7086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7087 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7088 {
7089 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7090 (__v4di) __B,
7091 (__v4di)
7092 _mm256_setzero_si256 (),
7093 (__mmask8) __U);
7094 }
7095
7096 extern __inline __m128i
7097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7098 _mm_rolv_epi64 (__m128i __A, __m128i __B)
7099 {
7100 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7101 (__v2di) __B,
7102 (__v2di)
7103 _mm_setzero_di (),
7104 (__mmask8) -1);
7105 }
7106
7107 extern __inline __m128i
7108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7109 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7110 __m128i __B)
7111 {
7112 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7113 (__v2di) __B,
7114 (__v2di) __W,
7115 (__mmask8) __U);
7116 }
7117
7118 extern __inline __m128i
7119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7120 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7121 {
7122 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7123 (__v2di) __B,
7124 (__v2di)
7125 _mm_setzero_di (),
7126 (__mmask8) __U);
7127 }
7128
7129 extern __inline __m256i
7130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7131 _mm256_rorv_epi64 (__m256i __A, __m256i __B)
7132 {
7133 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7134 (__v4di) __B,
7135 (__v4di)
7136 _mm256_setzero_si256 (),
7137 (__mmask8) -1);
7138 }
7139
7140 extern __inline __m256i
7141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7142 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7143 __m256i __B)
7144 {
7145 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7146 (__v4di) __B,
7147 (__v4di) __W,
7148 (__mmask8) __U);
7149 }
7150
7151 extern __inline __m256i
7152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7153 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7154 {
7155 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7156 (__v4di) __B,
7157 (__v4di)
7158 _mm256_setzero_si256 (),
7159 (__mmask8) __U);
7160 }
7161
7162 extern __inline __m128i
7163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7164 _mm_rorv_epi64 (__m128i __A, __m128i __B)
7165 {
7166 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7167 (__v2di) __B,
7168 (__v2di)
7169 _mm_setzero_di (),
7170 (__mmask8) -1);
7171 }
7172
7173 extern __inline __m128i
7174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7175 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7176 __m128i __B)
7177 {
7178 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7179 (__v2di) __B,
7180 (__v2di) __W,
7181 (__mmask8) __U);
7182 }
7183
7184 extern __inline __m128i
7185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7186 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7187 {
7188 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7189 (__v2di) __B,
7190 (__v2di)
7191 _mm_setzero_di (),
7192 (__mmask8) __U);
7193 }
7194
7195 extern __inline __m256i
7196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7197 _mm256_srav_epi64 (__m256i __X, __m256i __Y)
7198 {
7199 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7200 (__v4di) __Y,
7201 (__v4di)
7202 _mm256_setzero_si256 (),
7203 (__mmask8) -1);
7204 }
7205
7206 extern __inline __m256i
7207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7208 _mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
7209 __m256i __Y)
7210 {
7211 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7212 (__v4di) __Y,
7213 (__v4di) __W,
7214 (__mmask8) __U);
7215 }
7216
7217 extern __inline __m256i
7218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7219 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
7220 {
7221 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7222 (__v4di) __Y,
7223 (__v4di)
7224 _mm256_setzero_si256 (),
7225 (__mmask8) __U);
7226 }
7227
7228 extern __inline __m256i
7229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7230 _mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7231 __m256i __B)
7232 {
7233 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7234 (__v4di) __B,
7235 (__v4di) __W, __U);
7236 }
7237
7238 extern __inline __m256i
7239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7240 _mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7241 {
7242 return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7243 (__v4di) __B,
7244 (__v4di)
7245 _mm256_setzero_pd (),
7246 __U);
7247 }
7248
7249 extern __inline __m128i
7250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7251 _mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7252 __m128i __B)
7253 {
7254 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7255 (__v2di) __B,
7256 (__v2di) __W, __U);
7257 }
7258
7259 extern __inline __m128i
7260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7261 _mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7262 {
7263 return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7264 (__v2di) __B,
7265 (__v2di)
7266 _mm_setzero_pd (),
7267 __U);
7268 }
7269
7270 extern __inline __m256i
7271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7272 _mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7273 __m256i __B)
7274 {
7275 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7276 (__v4di) __B,
7277 (__v4di) __W, __U);
7278 }
7279
7280 extern __inline __m256i
7281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7282 _mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7283 {
7284 return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7285 (__v4di) __B,
7286 (__v4di)
7287 _mm256_setzero_pd (),
7288 __U);
7289 }
7290
7291 extern __inline __m128i
7292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7293 _mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7294 __m128i __B)
7295 {
7296 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7297 (__v2di) __B,
7298 (__v2di) __W, __U);
7299 }
7300
7301 extern __inline __m128i
7302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7303 _mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7304 {
7305 return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7306 (__v2di) __B,
7307 (__v2di)
7308 _mm_setzero_pd (),
7309 __U);
7310 }
7311
7312 extern __inline __m256i
7313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7314 _mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7315 __m256i __B)
7316 {
7317 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7318 (__v4di) __B,
7319 (__v4di) __W,
7320 (__mmask8) __U);
7321 }
7322
7323 extern __inline __m256i
7324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7325 _mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7326 {
7327 return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7328 (__v4di) __B,
7329 (__v4di)
7330 _mm256_setzero_si256 (),
7331 (__mmask8) __U);
7332 }
7333
7334 extern __inline __m128i
7335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7336 _mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
7337 {
7338 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7339 (__v2di) __B,
7340 (__v2di) __W,
7341 (__mmask8) __U);
7342 }
7343
7344 extern __inline __m128i
7345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7346 _mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7347 {
7348 return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7349 (__v2di) __B,
7350 (__v2di)
7351 _mm_setzero_si128 (),
7352 (__mmask8) __U);
7353 }
7354
7355 extern __inline __m256i
7356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7357 _mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7358 __m256i __B)
7359 {
7360 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7361 (__v4di) __B,
7362 (__v4di) __W,
7363 (__mmask8) __U);
7364 }
7365
7366 extern __inline __m256i
7367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7368 _mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7369 {
7370 return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7371 (__v4di) __B,
7372 (__v4di)
7373 _mm256_setzero_si256 (),
7374 (__mmask8) __U);
7375 }
7376
7377 extern __inline __m128i
7378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7379 _mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7380 __m128i __B)
7381 {
7382 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7383 (__v2di) __B,
7384 (__v2di) __W,
7385 (__mmask8) __U);
7386 }
7387
7388 extern __inline __m128i
7389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7390 _mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7391 {
7392 return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7393 (__v2di) __B,
7394 (__v2di)
7395 _mm_setzero_si128 (),
7396 (__mmask8) __U);
7397 }
7398
7399 extern __inline __m256d
7400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7401 _mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
7402 __m256d __B)
7403 {
7404 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7405 (__v4df) __B,
7406 (__v4df) __W,
7407 (__mmask8) __U);
7408 }
7409
7410 extern __inline __m256d
7411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7412 _mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B)
7413 {
7414 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7415 (__v4df) __B,
7416 (__v4df)
7417 _mm256_setzero_pd (),
7418 (__mmask8) __U);
7419 }
7420
7421 extern __inline __m256
7422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7423 _mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7424 {
7425 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7426 (__v8sf) __B,
7427 (__v8sf) __W,
7428 (__mmask8) __U);
7429 }
7430
7431 extern __inline __m256
7432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7433 _mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B)
7434 {
7435 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7436 (__v8sf) __B,
7437 (__v8sf)
7438 _mm256_setzero_ps (),
7439 (__mmask8) __U);
7440 }
7441
7442 extern __inline __m128
7443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7444 _mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7445 {
7446 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7447 (__v4sf) __B,
7448 (__v4sf) __W,
7449 (__mmask8) __U);
7450 }
7451
7452 extern __inline __m128
7453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7454 _mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B)
7455 {
7456 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7457 (__v4sf) __B,
7458 (__v4sf)
7459 _mm_setzero_ps (),
7460 (__mmask8) __U);
7461 }
7462
7463 extern __inline __m128d
7464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7465 _mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7466 {
7467 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7468 (__v2df) __B,
7469 (__v2df) __W,
7470 (__mmask8) __U);
7471 }
7472
7473 extern __inline __m128d
7474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7475 _mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B)
7476 {
7477 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7478 (__v2df) __B,
7479 (__v2df)
7480 _mm_setzero_pd (),
7481 (__mmask8) __U);
7482 }
7483
7484 extern __inline __m256d
7485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7486 _mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
7487 __m256d __B)
7488 {
7489 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7490 (__v4df) __B,
7491 (__v4df) __W,
7492 (__mmask8) __U);
7493 }
7494
7495 extern __inline __m256d
7496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7497 _mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
7498 __m256d __B)
7499 {
7500 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7501 (__v4df) __B,
7502 (__v4df) __W,
7503 (__mmask8) __U);
7504 }
7505
7506 extern __inline __m256d
7507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7508 _mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B)
7509 {
7510 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7511 (__v4df) __B,
7512 (__v4df)
7513 _mm256_setzero_pd (),
7514 (__mmask8) __U);
7515 }
7516
7517 extern __inline __m256
7518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7519 _mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7520 {
7521 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7522 (__v8sf) __B,
7523 (__v8sf) __W,
7524 (__mmask8) __U);
7525 }
7526
7527 extern __inline __m256d
7528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7529 _mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B)
7530 {
7531 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7532 (__v4df) __B,
7533 (__v4df)
7534 _mm256_setzero_pd (),
7535 (__mmask8) __U);
7536 }
7537
7538 extern __inline __m256
7539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7540 _mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7541 {
7542 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7543 (__v8sf) __B,
7544 (__v8sf) __W,
7545 (__mmask8) __U);
7546 }
7547
7548 extern __inline __m256
7549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7550 _mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B)
7551 {
7552 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7553 (__v8sf) __B,
7554 (__v8sf)
7555 _mm256_setzero_ps (),
7556 (__mmask8) __U);
7557 }
7558
7559 extern __inline __m256
7560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7561 _mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B)
7562 {
7563 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7564 (__v8sf) __B,
7565 (__v8sf)
7566 _mm256_setzero_ps (),
7567 (__mmask8) __U);
7568 }
7569
7570 extern __inline __m128
7571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7572 _mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7573 {
7574 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7575 (__v4sf) __B,
7576 (__v4sf) __W,
7577 (__mmask8) __U);
7578 }
7579
7580 extern __inline __m128
7581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7582 _mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7583 {
7584 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7585 (__v4sf) __B,
7586 (__v4sf) __W,
7587 (__mmask8) __U);
7588 }
7589
7590 extern __inline __m128
7591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7592 _mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B)
7593 {
7594 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7595 (__v4sf) __B,
7596 (__v4sf)
7597 _mm_setzero_ps (),
7598 (__mmask8) __U);
7599 }
7600
7601 extern __inline __m128
7602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7603 _mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B)
7604 {
7605 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7606 (__v4sf) __B,
7607 (__v4sf)
7608 _mm_setzero_ps (),
7609 (__mmask8) __U);
7610 }
7611
7612 extern __inline __m128
7613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7614 _mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7615 {
7616 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7617 (__v4sf) __B,
7618 (__v4sf) __W,
7619 (__mmask8) __U);
7620 }
7621
7622 extern __inline __m128
7623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7624 _mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B)
7625 {
7626 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7627 (__v4sf) __B,
7628 (__v4sf)
7629 _mm_setzero_ps (),
7630 (__mmask8) __U);
7631 }
7632
7633 extern __inline __m128d
7634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7635 _mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7636 {
7637 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7638 (__v2df) __B,
7639 (__v2df) __W,
7640 (__mmask8) __U);
7641 }
7642
7643 extern __inline __m128d
7644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7645 _mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B)
7646 {
7647 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7648 (__v2df) __B,
7649 (__v2df)
7650 _mm_setzero_pd (),
7651 (__mmask8) __U);
7652 }
7653
7654 extern __inline __m128d
7655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7656 _mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7657 {
7658 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7659 (__v2df) __B,
7660 (__v2df) __W,
7661 (__mmask8) __U);
7662 }
7663
7664 extern __inline __m128d
7665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7666 _mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B)
7667 {
7668 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7669 (__v2df) __B,
7670 (__v2df)
7671 _mm_setzero_pd (),
7672 (__mmask8) __U);
7673 }
7674
7675 extern __inline __m128d
7676 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7677 _mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7678 {
7679 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7680 (__v2df) __B,
7681 (__v2df) __W,
7682 (__mmask8) __U);
7683 }
7684
7685 extern __inline __m128d
7686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7687 _mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B)
7688 {
7689 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7690 (__v2df) __B,
7691 (__v2df)
7692 _mm_setzero_pd (),
7693 (__mmask8) __U);
7694 }
7695
7696 extern __inline __m256
7697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7698 _mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7699 {
7700 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7701 (__v8sf) __B,
7702 (__v8sf) __W,
7703 (__mmask8) __U);
7704 }
7705
7706 extern __inline __m256
7707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7708 _mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B)
7709 {
7710 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7711 (__v8sf) __B,
7712 (__v8sf)
7713 _mm256_setzero_ps (),
7714 (__mmask8) __U);
7715 }
7716
7717 extern __inline __m256d
7718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7719 _mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
7720 __m256d __B)
7721 {
7722 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7723 (__v4df) __B,
7724 (__v4df) __W,
7725 (__mmask8) __U);
7726 }
7727
7728 extern __inline __m256d
7729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7730 _mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B)
7731 {
7732 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7733 (__v4df) __B,
7734 (__v4df)
7735 _mm256_setzero_pd (),
7736 (__mmask8) __U);
7737 }
7738
7739 extern __inline __m256i
7740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7741 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7742 {
7743 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7744 (__v4di) __B,
7745 (__v4di)
7746 _mm256_setzero_si256 (),
7747 __M);
7748 }
7749
7750 extern __inline __m256i
7751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7752 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7753 __m256i __B)
7754 {
7755 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7756 (__v4di) __B,
7757 (__v4di) __W, __M);
7758 }
7759
7760 extern __inline __m256i
7761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7762 _mm256_min_epi64 (__m256i __A, __m256i __B)
7763 {
7764 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7765 (__v4di) __B,
7766 (__v4di)
7767 _mm256_setzero_si256 (),
7768 (__mmask8) -1);
7769 }
7770
7771 extern __inline __m256i
7772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7773 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7774 __m256i __B)
7775 {
7776 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7777 (__v4di) __B,
7778 (__v4di) __W, __M);
7779 }
7780
7781 extern __inline __m256i
7782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7783 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7784 {
7785 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7786 (__v4di) __B,
7787 (__v4di)
7788 _mm256_setzero_si256 (),
7789 __M);
7790 }
7791
7792 extern __inline __m256i
7793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7794 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7795 {
7796 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7797 (__v4di) __B,
7798 (__v4di)
7799 _mm256_setzero_si256 (),
7800 __M);
7801 }
7802
7803 extern __inline __m256i
7804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7805 _mm256_max_epi64 (__m256i __A, __m256i __B)
7806 {
7807 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7808 (__v4di) __B,
7809 (__v4di)
7810 _mm256_setzero_si256 (),
7811 (__mmask8) -1);
7812 }
7813
7814 extern __inline __m256i
7815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7816 _mm256_max_epu64 (__m256i __A, __m256i __B)
7817 {
7818 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7819 (__v4di) __B,
7820 (__v4di)
7821 _mm256_setzero_si256 (),
7822 (__mmask8) -1);
7823 }
7824
7825 extern __inline __m256i
7826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7827 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7828 __m256i __B)
7829 {
7830 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7831 (__v4di) __B,
7832 (__v4di) __W, __M);
7833 }
7834
7835 extern __inline __m256i
7836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7837 _mm256_min_epu64 (__m256i __A, __m256i __B)
7838 {
7839 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7840 (__v4di) __B,
7841 (__v4di)
7842 _mm256_setzero_si256 (),
7843 (__mmask8) -1);
7844 }
7845
7846 extern __inline __m256i
7847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7848 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7849 __m256i __B)
7850 {
7851 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7852 (__v4di) __B,
7853 (__v4di) __W, __M);
7854 }
7855
7856 extern __inline __m256i
7857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7858 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7859 {
7860 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7861 (__v4di) __B,
7862 (__v4di)
7863 _mm256_setzero_si256 (),
7864 __M);
7865 }
7866
7867 extern __inline __m256i
7868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7869 _mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7870 {
7871 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7872 (__v8si) __B,
7873 (__v8si)
7874 _mm256_setzero_si256 (),
7875 __M);
7876 }
7877
7878 extern __inline __m256i
7879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7880 _mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7881 __m256i __B)
7882 {
7883 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7884 (__v8si) __B,
7885 (__v8si) __W, __M);
7886 }
7887
7888 extern __inline __m256i
7889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7890 _mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7891 {
7892 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7893 (__v8si) __B,
7894 (__v8si)
7895 _mm256_setzero_si256 (),
7896 __M);
7897 }
7898
7899 extern __inline __m256i
7900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7901 _mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7902 __m256i __B)
7903 {
7904 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7905 (__v8si) __B,
7906 (__v8si) __W, __M);
7907 }
7908
7909 extern __inline __m256i
7910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7911 _mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7912 {
7913 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7914 (__v8si) __B,
7915 (__v8si)
7916 _mm256_setzero_si256 (),
7917 __M);
7918 }
7919
7920 extern __inline __m256i
7921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7922 _mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
7923 __m256i __B)
7924 {
7925 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7926 (__v8si) __B,
7927 (__v8si) __W, __M);
7928 }
7929
7930 extern __inline __m256i
7931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7932 _mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7933 {
7934 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
7935 (__v8si) __B,
7936 (__v8si)
7937 _mm256_setzero_si256 (),
7938 __M);
7939 }
7940
7941 extern __inline __m256i
7942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7943 _mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
7944 __m256i __B)
7945 {
7946 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
7947 (__v8si) __B,
7948 (__v8si) __W, __M);
7949 }
7950
7951 extern __inline __m128i
7952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7953 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
7954 {
7955 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
7956 (__v2di) __B,
7957 (__v2di)
7958 _mm_setzero_si128 (),
7959 __M);
7960 }
7961
7962 extern __inline __m128i
7963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7964 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
7965 __m128i __B)
7966 {
7967 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
7968 (__v2di) __B,
7969 (__v2di) __W, __M);
7970 }
7971
7972 extern __inline __m128i
7973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7974 _mm_min_epi64 (__m128i __A, __m128i __B)
7975 {
7976 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
7977 (__v2di) __B,
7978 (__v2di)
7979 _mm_setzero_di (),
7980 (__mmask8) -1);
7981 }
7982
7983 extern __inline __m128i
7984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7985 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
7986 __m128i __B)
7987 {
7988 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
7989 (__v2di) __B,
7990 (__v2di) __W, __M);
7991 }
7992
7993 extern __inline __m128i
7994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7995 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
7996 {
7997 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
7998 (__v2di) __B,
7999 (__v2di)
8000 _mm_setzero_si128 (),
8001 __M);
8002 }
8003
8004 extern __inline __m128i
8005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8006 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8007 {
8008 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8009 (__v2di) __B,
8010 (__v2di)
8011 _mm_setzero_si128 (),
8012 __M);
8013 }
8014
8015 extern __inline __m128i
8016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8017 _mm_max_epi64 (__m128i __A, __m128i __B)
8018 {
8019 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8020 (__v2di) __B,
8021 (__v2di)
8022 _mm_setzero_di (),
8023 (__mmask8) -1);
8024 }
8025
8026 extern __inline __m128i
8027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8028 _mm_max_epu64 (__m128i __A, __m128i __B)
8029 {
8030 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8031 (__v2di) __B,
8032 (__v2di)
8033 _mm_setzero_di (),
8034 (__mmask8) -1);
8035 }
8036
8037 extern __inline __m128i
8038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8039 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8040 __m128i __B)
8041 {
8042 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8043 (__v2di) __B,
8044 (__v2di) __W, __M);
8045 }
8046
8047 extern __inline __m128i
8048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8049 _mm_min_epu64 (__m128i __A, __m128i __B)
8050 {
8051 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8052 (__v2di) __B,
8053 (__v2di)
8054 _mm_setzero_di (),
8055 (__mmask8) -1);
8056 }
8057
8058 extern __inline __m128i
8059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8060 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8061 __m128i __B)
8062 {
8063 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8064 (__v2di) __B,
8065 (__v2di) __W, __M);
8066 }
8067
8068 extern __inline __m128i
8069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8070 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8071 {
8072 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8073 (__v2di) __B,
8074 (__v2di)
8075 _mm_setzero_si128 (),
8076 __M);
8077 }
8078
8079 extern __inline __m128i
8080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8081 _mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8082 {
8083 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8084 (__v4si) __B,
8085 (__v4si)
8086 _mm_setzero_si128 (),
8087 __M);
8088 }
8089
8090 extern __inline __m128i
8091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8092 _mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8093 __m128i __B)
8094 {
8095 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8096 (__v4si) __B,
8097 (__v4si) __W, __M);
8098 }
8099
8100 extern __inline __m128i
8101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8102 _mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8103 {
8104 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8105 (__v4si) __B,
8106 (__v4si)
8107 _mm_setzero_si128 (),
8108 __M);
8109 }
8110
8111 extern __inline __m128i
8112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8113 _mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8114 __m128i __B)
8115 {
8116 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8117 (__v4si) __B,
8118 (__v4si) __W, __M);
8119 }
8120
8121 extern __inline __m128i
8122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8123 _mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8124 {
8125 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8126 (__v4si) __B,
8127 (__v4si)
8128 _mm_setzero_si128 (),
8129 __M);
8130 }
8131
8132 extern __inline __m128i
8133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8134 _mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8135 __m128i __B)
8136 {
8137 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8138 (__v4si) __B,
8139 (__v4si) __W, __M);
8140 }
8141
8142 extern __inline __m128i
8143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8144 _mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8145 {
8146 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8147 (__v4si) __B,
8148 (__v4si)
8149 _mm_setzero_si128 (),
8150 __M);
8151 }
8152
8153 extern __inline __m128i
8154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8155 _mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8156 __m128i __B)
8157 {
8158 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8159 (__v4si) __B,
8160 (__v4si) __W, __M);
8161 }
8162
8163 #ifndef __AVX512CD__
8164 #pragma GCC push_options
8165 #pragma GCC target("avx512vl,avx512cd")
8166 #define __DISABLE_AVX512VLCD__
8167 #endif
8168
8169 extern __inline __m128i
8170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8171 _mm_broadcastmb_epi64 (__mmask8 __A)
8172 {
8173 return (__m128i) __builtin_ia32_broadcastmb128 (__A);
8174 }
8175
8176 extern __inline __m256i
8177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8178 _mm256_broadcastmb_epi64 (__mmask8 __A)
8179 {
8180 return (__m256i) __builtin_ia32_broadcastmb256 (__A);
8181 }
8182
8183 extern __inline __m128i
8184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8185 _mm_broadcastmw_epi32 (__mmask16 __A)
8186 {
8187 return (__m128i) __builtin_ia32_broadcastmw128 (__A);
8188 }
8189
8190 extern __inline __m256i
8191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8192 _mm256_broadcastmw_epi32 (__mmask16 __A)
8193 {
8194 return (__m256i) __builtin_ia32_broadcastmw256 (__A);
8195 }
8196
8197 extern __inline __m256i
8198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8199 _mm256_lzcnt_epi32 (__m256i __A)
8200 {
8201 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8202 (__v8si)
8203 _mm256_setzero_si256 (),
8204 (__mmask8) -1);
8205 }
8206
8207 extern __inline __m256i
8208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8209 _mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8210 {
8211 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8212 (__v8si) __W,
8213 (__mmask8) __U);
8214 }
8215
8216 extern __inline __m256i
8217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8218 _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
8219 {
8220 return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8221 (__v8si)
8222 _mm256_setzero_si256 (),
8223 (__mmask8) __U);
8224 }
8225
8226 extern __inline __m256i
8227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8228 _mm256_lzcnt_epi64 (__m256i __A)
8229 {
8230 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8231 (__v4di)
8232 _mm256_setzero_si256 (),
8233 (__mmask8) -1);
8234 }
8235
8236 extern __inline __m256i
8237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8238 _mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8239 {
8240 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8241 (__v4di) __W,
8242 (__mmask8) __U);
8243 }
8244
8245 extern __inline __m256i
8246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8247 _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
8248 {
8249 return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8250 (__v4di)
8251 _mm256_setzero_si256 (),
8252 (__mmask8) __U);
8253 }
8254
8255 extern __inline __m256i
8256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8257 _mm256_conflict_epi64 (__m256i __A)
8258 {
8259 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8260 (__v4di)
8261 _mm256_setzero_si256 (),
8262 (__mmask8) -
8263 1);
8264 }
8265
8266 extern __inline __m256i
8267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8268 _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8269 {
8270 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8271 (__v4di) __W,
8272 (__mmask8)
8273 __U);
8274 }
8275
8276 extern __inline __m256i
8277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8278 _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
8279 {
8280 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8281 (__v4di)
8282 _mm256_setzero_si256 (),
8283 (__mmask8)
8284 __U);
8285 }
8286
8287 extern __inline __m256i
8288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8289 _mm256_conflict_epi32 (__m256i __A)
8290 {
8291 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8292 (__v8si)
8293 _mm256_setzero_si256 (),
8294 (__mmask8) -
8295 1);
8296 }
8297
8298 extern __inline __m256i
8299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8300 _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8301 {
8302 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8303 (__v8si) __W,
8304 (__mmask8)
8305 __U);
8306 }
8307
8308 extern __inline __m256i
8309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8310 _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
8311 {
8312 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8313 (__v8si)
8314 _mm256_setzero_si256 (),
8315 (__mmask8)
8316 __U);
8317 }
8318
8319 extern __inline __m128i
8320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8321 _mm_lzcnt_epi32 (__m128i __A)
8322 {
8323 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8324 (__v4si)
8325 _mm_setzero_si128 (),
8326 (__mmask8) -1);
8327 }
8328
8329 extern __inline __m128i
8330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8331 _mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8332 {
8333 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8334 (__v4si) __W,
8335 (__mmask8) __U);
8336 }
8337
8338 extern __inline __m128i
8339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8340 _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
8341 {
8342 return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8343 (__v4si)
8344 _mm_setzero_si128 (),
8345 (__mmask8) __U);
8346 }
8347
8348 extern __inline __m128i
8349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8350 _mm_lzcnt_epi64 (__m128i __A)
8351 {
8352 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8353 (__v2di)
8354 _mm_setzero_di (),
8355 (__mmask8) -1);
8356 }
8357
8358 extern __inline __m128i
8359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8360 _mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8361 {
8362 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8363 (__v2di) __W,
8364 (__mmask8) __U);
8365 }
8366
8367 extern __inline __m128i
8368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8369 _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
8370 {
8371 return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8372 (__v2di)
8373 _mm_setzero_di (),
8374 (__mmask8) __U);
8375 }
8376
8377 extern __inline __m128i
8378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8379 _mm_conflict_epi64 (__m128i __A)
8380 {
8381 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8382 (__v2di)
8383 _mm_setzero_di (),
8384 (__mmask8) -
8385 1);
8386 }
8387
8388 extern __inline __m128i
8389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8390 _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8391 {
8392 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8393 (__v2di) __W,
8394 (__mmask8)
8395 __U);
8396 }
8397
8398 extern __inline __m128i
8399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8400 _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
8401 {
8402 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8403 (__v2di)
8404 _mm_setzero_di (),
8405 (__mmask8)
8406 __U);
8407 }
8408
8409 extern __inline __m128i
8410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8411 _mm_conflict_epi32 (__m128i __A)
8412 {
8413 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8414 (__v4si)
8415 _mm_setzero_si128 (),
8416 (__mmask8) -
8417 1);
8418 }
8419
8420 extern __inline __m128i
8421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8422 _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8423 {
8424 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8425 (__v4si) __W,
8426 (__mmask8)
8427 __U);
8428 }
8429
8430 extern __inline __m128i
8431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8432 _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
8433 {
8434 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8435 (__v4si)
8436 _mm_setzero_si128 (),
8437 (__mmask8)
8438 __U);
8439 }
8440
8441 #ifdef __DISABLE_AVX512VLCD__
8442 #pragma GCC pop_options
8443 #endif
8444
8445 extern __inline __m256d
8446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8447 _mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A,
8448 __m256d __B)
8449 {
8450 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8451 (__v4df) __B,
8452 (__v4df) __W,
8453 (__mmask8) __U);
8454 }
8455
8456 extern __inline __m256d
8457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8458 _mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
8459 {
8460 return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8461 (__v4df) __B,
8462 (__v4df)
8463 _mm256_setzero_pd (),
8464 (__mmask8) __U);
8465 }
8466
8467 extern __inline __m128d
8468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8469 _mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A,
8470 __m128d __B)
8471 {
8472 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8473 (__v2df) __B,
8474 (__v2df) __W,
8475 (__mmask8) __U);
8476 }
8477
8478 extern __inline __m128d
8479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8480 _mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
8481 {
8482 return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8483 (__v2df) __B,
8484 (__v2df)
8485 _mm_setzero_pd (),
8486 (__mmask8) __U);
8487 }
8488
8489 extern __inline __m256
8490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8491 _mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A,
8492 __m256 __B)
8493 {
8494 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8495 (__v8sf) __B,
8496 (__v8sf) __W,
8497 (__mmask8) __U);
8498 }
8499
8500 extern __inline __m256d
8501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8502 _mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A,
8503 __m256d __B)
8504 {
8505 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8506 (__v4df) __B,
8507 (__v4df) __W,
8508 (__mmask8) __U);
8509 }
8510
8511 extern __inline __m256d
8512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8513 _mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
8514 {
8515 return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8516 (__v4df) __B,
8517 (__v4df)
8518 _mm256_setzero_pd (),
8519 (__mmask8) __U);
8520 }
8521
8522 extern __inline __m128d
8523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8524 _mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A,
8525 __m128d __B)
8526 {
8527 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8528 (__v2df) __B,
8529 (__v2df) __W,
8530 (__mmask8) __U);
8531 }
8532
8533 extern __inline __m128d
8534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8535 _mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
8536 {
8537 return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8538 (__v2df) __B,
8539 (__v2df)
8540 _mm_setzero_pd (),
8541 (__mmask8) __U);
8542 }
8543
8544 extern __inline __m256
8545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8546 _mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A,
8547 __m256 __B)
8548 {
8549 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8550 (__v8sf) __B,
8551 (__v8sf) __W,
8552 (__mmask8) __U);
8553 }
8554
8555 extern __inline __m256
8556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8557 _mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
8558 {
8559 return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8560 (__v8sf) __B,
8561 (__v8sf)
8562 _mm256_setzero_ps (),
8563 (__mmask8) __U);
8564 }
8565
8566 extern __inline __m128
8567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8568 _mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8569 {
8570 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8571 (__v4sf) __B,
8572 (__v4sf) __W,
8573 (__mmask8) __U);
8574 }
8575
8576 extern __inline __m128
8577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8578 _mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
8579 {
8580 return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8581 (__v4sf) __B,
8582 (__v4sf)
8583 _mm_setzero_ps (),
8584 (__mmask8) __U);
8585 }
8586
8587 extern __inline __m128
8588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8589 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8590 {
8591 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8592 (__v4sf) __W,
8593 (__mmask8) __U);
8594 }
8595
8596 extern __inline __m128
8597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8598 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8599 {
8600 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8601 (__v4sf)
8602 _mm_setzero_ps (),
8603 (__mmask8) __U);
8604 }
8605
8606 extern __inline __m256
8607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8608 _mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
8609 {
8610 return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8611 (__v8sf) __B,
8612 (__v8sf)
8613 _mm256_setzero_ps (),
8614 (__mmask8) __U);
8615 }
8616
8617 extern __inline __m256
8618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8619 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8620 {
8621 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8622 (__v8sf) __W,
8623 (__mmask8) __U);
8624 }
8625
8626 extern __inline __m256
8627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8628 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8629 {
8630 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8631 (__v8sf)
8632 _mm256_setzero_ps (),
8633 (__mmask8) __U);
8634 }
8635
8636 extern __inline __m128
8637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8638 _mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8639 {
8640 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8641 (__v4sf) __B,
8642 (__v4sf) __W,
8643 (__mmask8) __U);
8644 }
8645
8646 extern __inline __m128
8647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8648 _mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
8649 {
8650 return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8651 (__v4sf) __B,
8652 (__v4sf)
8653 _mm_setzero_ps (),
8654 (__mmask8) __U);
8655 }
8656
8657 extern __inline __m256i
8658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8659 _mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8660 __m128i __B)
8661 {
8662 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8663 (__v4si) __B,
8664 (__v8si) __W,
8665 (__mmask8) __U);
8666 }
8667
8668 extern __inline __m256i
8669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8670 _mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8671 {
8672 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8673 (__v4si) __B,
8674 (__v8si)
8675 _mm256_setzero_si256 (),
8676 (__mmask8) __U);
8677 }
8678
8679 extern __inline __m128i
8680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8681 _mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8682 __m128i __B)
8683 {
8684 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8685 (__v4si) __B,
8686 (__v4si) __W,
8687 (__mmask8) __U);
8688 }
8689
8690 extern __inline __m128i
8691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8692 _mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8693 {
8694 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8695 (__v4si) __B,
8696 (__v4si)
8697 _mm_setzero_si128 (),
8698 (__mmask8) __U);
8699 }
8700
8701 extern __inline __m256i
8702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8703 _mm256_sra_epi64 (__m256i __A, __m128i __B)
8704 {
8705 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8706 (__v2di) __B,
8707 (__v4di)
8708 _mm256_setzero_si256 (),
8709 (__mmask8) -1);
8710 }
8711
8712 extern __inline __m256i
8713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8714 _mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8715 __m128i __B)
8716 {
8717 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8718 (__v2di) __B,
8719 (__v4di) __W,
8720 (__mmask8) __U);
8721 }
8722
8723 extern __inline __m256i
8724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8725 _mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8726 {
8727 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8728 (__v2di) __B,
8729 (__v4di)
8730 _mm256_setzero_si256 (),
8731 (__mmask8) __U);
8732 }
8733
8734 extern __inline __m128i
8735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8736 _mm_sra_epi64 (__m128i __A, __m128i __B)
8737 {
8738 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8739 (__v2di) __B,
8740 (__v2di)
8741 _mm_setzero_di (),
8742 (__mmask8) -1);
8743 }
8744
8745 extern __inline __m128i
8746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8747 _mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8748 __m128i __B)
8749 {
8750 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8751 (__v2di) __B,
8752 (__v2di) __W,
8753 (__mmask8) __U);
8754 }
8755
8756 extern __inline __m128i
8757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8758 _mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8759 {
8760 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8761 (__v2di) __B,
8762 (__v2di)
8763 _mm_setzero_di (),
8764 (__mmask8) __U);
8765 }
8766
8767 extern __inline __m128i
8768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8769 _mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8770 __m128i __B)
8771 {
8772 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8773 (__v4si) __B,
8774 (__v4si) __W,
8775 (__mmask8) __U);
8776 }
8777
8778 extern __inline __m128i
8779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8780 _mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8781 {
8782 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8783 (__v4si) __B,
8784 (__v4si)
8785 _mm_setzero_si128 (),
8786 (__mmask8) __U);
8787 }
8788
8789 extern __inline __m128i
8790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8791 _mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8792 __m128i __B)
8793 {
8794 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8795 (__v2di) __B,
8796 (__v2di) __W,
8797 (__mmask8) __U);
8798 }
8799
8800 extern __inline __m128i
8801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8802 _mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8803 {
8804 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8805 (__v2di) __B,
8806 (__v2di)
8807 _mm_setzero_di (),
8808 (__mmask8) __U);
8809 }
8810
8811 extern __inline __m256i
8812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8813 _mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8814 __m128i __B)
8815 {
8816 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8817 (__v4si) __B,
8818 (__v8si) __W,
8819 (__mmask8) __U);
8820 }
8821
8822 extern __inline __m256i
8823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8824 _mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8825 {
8826 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8827 (__v4si) __B,
8828 (__v8si)
8829 _mm256_setzero_si256 (),
8830 (__mmask8) __U);
8831 }
8832
8833 extern __inline __m256i
8834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8835 _mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8836 __m128i __B)
8837 {
8838 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8839 (__v2di) __B,
8840 (__v4di) __W,
8841 (__mmask8) __U);
8842 }
8843
8844 extern __inline __m256i
8845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8846 _mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8847 {
8848 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8849 (__v2di) __B,
8850 (__v4di)
8851 _mm256_setzero_si256 (),
8852 (__mmask8) __U);
8853 }
8854
8855 extern __inline __m256
8856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8857 _mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
8858 __m256 __Y)
8859 {
8860 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8861 (__v8si) __X,
8862 (__v8sf) __W,
8863 (__mmask8) __U);
8864 }
8865
8866 extern __inline __m256
8867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8868 _mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
8869 {
8870 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8871 (__v8si) __X,
8872 (__v8sf)
8873 _mm256_setzero_ps (),
8874 (__mmask8) __U);
8875 }
8876
8877 extern __inline __m256d
8878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8879 _mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8880 {
8881 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8882 (__v4di) __X,
8883 (__v4df)
8884 _mm256_setzero_pd (),
8885 (__mmask8) -1);
8886 }
8887
8888 extern __inline __m256d
8889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8890 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8891 __m256d __Y)
8892 {
8893 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8894 (__v4di) __X,
8895 (__v4df) __W,
8896 (__mmask8) __U);
8897 }
8898
8899 extern __inline __m256d
8900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8901 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8902 {
8903 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8904 (__v4di) __X,
8905 (__v4df)
8906 _mm256_setzero_pd (),
8907 (__mmask8) __U);
8908 }
8909
8910 extern __inline __m256d
8911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8912 _mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
8913 __m256i __C)
8914 {
8915 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8916 (__v4di) __C,
8917 (__v4df) __W,
8918 (__mmask8)
8919 __U);
8920 }
8921
8922 extern __inline __m256d
8923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8924 _mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
8925 {
8926 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8927 (__v4di) __C,
8928 (__v4df)
8929 _mm256_setzero_pd (),
8930 (__mmask8)
8931 __U);
8932 }
8933
8934 extern __inline __m256
8935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8936 _mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
8937 __m256i __C)
8938 {
8939 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
8940 (__v8si) __C,
8941 (__v8sf) __W,
8942 (__mmask8) __U);
8943 }
8944
8945 extern __inline __m256
8946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8947 _mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
8948 {
8949 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
8950 (__v8si) __C,
8951 (__v8sf)
8952 _mm256_setzero_ps (),
8953 (__mmask8) __U);
8954 }
8955
8956 extern __inline __m128d
8957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8958 _mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
8959 __m128i __C)
8960 {
8961 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
8962 (__v2di) __C,
8963 (__v2df) __W,
8964 (__mmask8) __U);
8965 }
8966
8967 extern __inline __m128d
8968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8969 _mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
8970 {
8971 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
8972 (__v2di) __C,
8973 (__v2df)
8974 _mm_setzero_pd (),
8975 (__mmask8) __U);
8976 }
8977
8978 extern __inline __m128
8979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8980 _mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
8981 __m128i __C)
8982 {
8983 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
8984 (__v4si) __C,
8985 (__v4sf) __W,
8986 (__mmask8) __U);
8987 }
8988
8989 extern __inline __m128
8990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8991 _mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
8992 {
8993 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
8994 (__v4si) __C,
8995 (__v4sf)
8996 _mm_setzero_ps (),
8997 (__mmask8) __U);
8998 }
8999
9000 extern __inline __m256i
9001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9002 _mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
9003 {
9004 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9005 (__v8si) __B,
9006 (__v8si)
9007 _mm256_setzero_si256 (),
9008 __M);
9009 }
9010
9011 extern __inline __m256i
9012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9013 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
9014 {
9015 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9016 (__v4di) __X,
9017 (__v4di)
9018 _mm256_setzero_si256 (),
9019 __M);
9020 }
9021
9022 extern __inline __m256i
9023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9024 _mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
9025 __m256i __B)
9026 {
9027 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9028 (__v8si) __B,
9029 (__v8si) __W, __M);
9030 }
9031
9032 extern __inline __m128i
9033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9034 _mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
9035 {
9036 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9037 (__v4si) __B,
9038 (__v4si)
9039 _mm_setzero_si128 (),
9040 __M);
9041 }
9042
9043 extern __inline __m128i
9044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9045 _mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
9046 __m128i __B)
9047 {
9048 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9049 (__v4si) __B,
9050 (__v4si) __W, __M);
9051 }
9052
9053 extern __inline __m256i
9054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9055 _mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9056 __m256i __Y)
9057 {
9058 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9059 (__v8si) __Y,
9060 (__v4di) __W, __M);
9061 }
9062
9063 extern __inline __m256i
9064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9065 _mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9066 {
9067 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9068 (__v8si) __Y,
9069 (__v4di)
9070 _mm256_setzero_si256 (),
9071 __M);
9072 }
9073
9074 extern __inline __m128i
9075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9076 _mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
9077 __m128i __Y)
9078 {
9079 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9080 (__v4si) __Y,
9081 (__v2di) __W, __M);
9082 }
9083
9084 extern __inline __m128i
9085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9086 _mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
9087 {
9088 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9089 (__v4si) __Y,
9090 (__v2di)
9091 _mm_setzero_si128 (),
9092 __M);
9093 }
9094
9095 extern __inline __m256i
9096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9097 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
9098 __m256i __Y)
9099 {
9100 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9101 (__v4di) __X,
9102 (__v4di) __W,
9103 __M);
9104 }
9105
9106 extern __inline __m256i
9107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9108 _mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
9109 __m256i __Y)
9110 {
9111 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9112 (__v8si) __Y,
9113 (__v4di) __W, __M);
9114 }
9115
9116 extern __inline __m256i
9117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9118 _mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9119 {
9120 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9121 (__v8si) __X,
9122 (__v8si)
9123 _mm256_setzero_si256 (),
9124 __M);
9125 }
9126
9127 extern __inline __m256i
9128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9129 _mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
9130 {
9131 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9132 (__v8si) __Y,
9133 (__v4di)
9134 _mm256_setzero_si256 (),
9135 __M);
9136 }
9137
9138 extern __inline __m128i
9139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9140 _mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
9141 __m128i __Y)
9142 {
9143 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9144 (__v4si) __Y,
9145 (__v2di) __W, __M);
9146 }
9147
9148 extern __inline __m128i
9149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9150 _mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
9151 {
9152 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9153 (__v4si) __Y,
9154 (__v2di)
9155 _mm_setzero_si128 (),
9156 __M);
9157 }
9158
9159 extern __inline __m256i
9160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9161 _mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9162 __m256i __Y)
9163 {
9164 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9165 (__v8si) __X,
9166 (__v8si) __W,
9167 __M);
9168 }
9169
9170 #ifdef __OPTIMIZE__
9171 extern __inline __m256i
9172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9173 _mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
9174 __m256i __X, const int __I)
9175 {
9176 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9177 __I,
9178 (__v4di) __W,
9179 (__mmask8) __M);
9180 }
9181
9182 extern __inline __m256i
9183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9184 _mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I)
9185 {
9186 return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9187 __I,
9188 (__v4di)
9189 _mm256_setzero_si256 (),
9190 (__mmask8) __M);
9191 }
9192
9193 extern __inline __m256d
9194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9195 _mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A,
9196 __m256d __B, const int __imm)
9197 {
9198 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9199 (__v4df) __B, __imm,
9200 (__v4df) __W,
9201 (__mmask8) __U);
9202 }
9203
9204 extern __inline __m256d
9205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9206 _mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B,
9207 const int __imm)
9208 {
9209 return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9210 (__v4df) __B, __imm,
9211 (__v4df)
9212 _mm256_setzero_pd (),
9213 (__mmask8) __U);
9214 }
9215
9216 extern __inline __m128d
9217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9218 _mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A,
9219 __m128d __B, const int __imm)
9220 {
9221 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9222 (__v2df) __B, __imm,
9223 (__v2df) __W,
9224 (__mmask8) __U);
9225 }
9226
9227 extern __inline __m128d
9228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9229 _mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B,
9230 const int __imm)
9231 {
9232 return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9233 (__v2df) __B, __imm,
9234 (__v2df)
9235 _mm_setzero_pd (),
9236 (__mmask8) __U);
9237 }
9238
9239 extern __inline __m256
9240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9241 _mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A,
9242 __m256 __B, const int __imm)
9243 {
9244 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9245 (__v8sf) __B, __imm,
9246 (__v8sf) __W,
9247 (__mmask8) __U);
9248 }
9249
9250 extern __inline __m256
9251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9252 _mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B,
9253 const int __imm)
9254 {
9255 return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9256 (__v8sf) __B, __imm,
9257 (__v8sf)
9258 _mm256_setzero_ps (),
9259 (__mmask8) __U);
9260 }
9261
9262 extern __inline __m128
9263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9264 _mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
9265 const int __imm)
9266 {
9267 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9268 (__v4sf) __B, __imm,
9269 (__v4sf) __W,
9270 (__mmask8) __U);
9271 }
9272
9273 extern __inline __m128
9274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9275 _mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B,
9276 const int __imm)
9277 {
9278 return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9279 (__v4sf) __B, __imm,
9280 (__v4sf)
9281 _mm_setzero_ps (),
9282 (__mmask8) __U);
9283 }
9284
9285 extern __inline __m256i
9286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9287 _mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm)
9288 {
9289 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9290 (__v4si) __B,
9291 __imm,
9292 (__v8si)
9293 _mm256_setzero_si256 (),
9294 (__mmask8) -
9295 1);
9296 }
9297
9298 extern __inline __m256i
9299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9300 _mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A,
9301 __m128i __B, const int __imm)
9302 {
9303 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9304 (__v4si) __B,
9305 __imm,
9306 (__v8si) __W,
9307 (__mmask8)
9308 __U);
9309 }
9310
9311 extern __inline __m256i
9312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9313 _mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B,
9314 const int __imm)
9315 {
9316 return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9317 (__v4si) __B,
9318 __imm,
9319 (__v8si)
9320 _mm256_setzero_si256 (),
9321 (__mmask8)
9322 __U);
9323 }
9324
9325 extern __inline __m256
9326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9327 _mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm)
9328 {
9329 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9330 (__v4sf) __B,
9331 __imm,
9332 (__v8sf)
9333 _mm256_setzero_ps (),
9334 (__mmask8) -1);
9335 }
9336
9337 extern __inline __m256
9338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9339 _mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A,
9340 __m128 __B, const int __imm)
9341 {
9342 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9343 (__v4sf) __B,
9344 __imm,
9345 (__v8sf) __W,
9346 (__mmask8) __U);
9347 }
9348
9349 extern __inline __m256
9350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9351 _mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B,
9352 const int __imm)
9353 {
9354 return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9355 (__v4sf) __B,
9356 __imm,
9357 (__v8sf)
9358 _mm256_setzero_ps (),
9359 (__mmask8) __U);
9360 }
9361
9362 extern __inline __m128i
9363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9364 _mm256_extracti32x4_epi32 (__m256i __A, const int __imm)
9365 {
9366 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9367 __imm,
9368 (__v4si)
9369 _mm_setzero_si128 (),
9370 (__mmask8) -
9371 1);
9372 }
9373
9374 extern __inline __m128i
9375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9376 _mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A,
9377 const int __imm)
9378 {
9379 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9380 __imm,
9381 (__v4si) __W,
9382 (__mmask8)
9383 __U);
9384 }
9385
9386 extern __inline __m128i
9387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9388 _mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A,
9389 const int __imm)
9390 {
9391 return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
9392 __imm,
9393 (__v4si)
9394 _mm_setzero_si128 (),
9395 (__mmask8)
9396 __U);
9397 }
9398
9399 extern __inline __m128
9400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9401 _mm256_extractf32x4_ps (__m256 __A, const int __imm)
9402 {
9403 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
9404 __imm,
9405 (__v4sf)
9406 _mm_setzero_ps (),
9407 (__mmask8) -
9408 1);
9409 }
9410
9411 extern __inline __m128
9412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9413 _mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A,
9414 const int __imm)
9415 {
9416 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
9417 __imm,
9418 (__v4sf) __W,
9419 (__mmask8)
9420 __U);
9421 }
9422
9423 extern __inline __m128
9424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9425 _mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A,
9426 const int __imm)
9427 {
9428 return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
9429 __imm,
9430 (__v4sf)
9431 _mm_setzero_ps (),
9432 (__mmask8)
9433 __U);
9434 }
9435
9436 extern __inline __m256i
9437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9438 _mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm)
9439 {
9440 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
9441 (__v4di) __B,
9442 __imm,
9443 (__v4di)
9444 _mm256_setzero_si256 (),
9445 (__mmask8) -1);
9446 }
9447
9448 extern __inline __m256i
9449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9450 _mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A,
9451 __m256i __B, const int __imm)
9452 {
9453 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
9454 (__v4di) __B,
9455 __imm,
9456 (__v4di) __W,
9457 (__mmask8) __U);
9458 }
9459
9460 extern __inline __m256i
9461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9462 _mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B,
9463 const int __imm)
9464 {
9465 return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
9466 (__v4di) __B,
9467 __imm,
9468 (__v4di)
9469 _mm256_setzero_si256 (),
9470 (__mmask8) __U);
9471 }
9472
9473 extern __inline __m256i
9474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9475 _mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm)
9476 {
9477 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
9478 (__v8si) __B,
9479 __imm,
9480 (__v8si)
9481 _mm256_setzero_si256 (),
9482 (__mmask8) -1);
9483 }
9484
9485 extern __inline __m256i
9486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9487 _mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A,
9488 __m256i __B, const int __imm)
9489 {
9490 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
9491 (__v8si) __B,
9492 __imm,
9493 (__v8si) __W,
9494 (__mmask8) __U);
9495 }
9496
9497 extern __inline __m256i
9498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9499 _mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B,
9500 const int __imm)
9501 {
9502 return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
9503 (__v8si) __B,
9504 __imm,
9505 (__v8si)
9506 _mm256_setzero_si256 (),
9507 (__mmask8) __U);
9508 }
9509
9510 extern __inline __m256d
9511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9512 _mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm)
9513 {
9514 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
9515 (__v4df) __B,
9516 __imm,
9517 (__v4df)
9518 _mm256_setzero_pd (),
9519 (__mmask8) -1);
9520 }
9521
9522 extern __inline __m256d
9523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9524 _mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A,
9525 __m256d __B, const int __imm)
9526 {
9527 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
9528 (__v4df) __B,
9529 __imm,
9530 (__v4df) __W,
9531 (__mmask8) __U);
9532 }
9533
9534 extern __inline __m256d
9535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9536 _mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B,
9537 const int __imm)
9538 {
9539 return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
9540 (__v4df) __B,
9541 __imm,
9542 (__v4df)
9543 _mm256_setzero_pd (),
9544 (__mmask8) __U);
9545 }
9546
9547 extern __inline __m256
9548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9549 _mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm)
9550 {
9551 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
9552 (__v8sf) __B,
9553 __imm,
9554 (__v8sf)
9555 _mm256_setzero_ps (),
9556 (__mmask8) -1);
9557 }
9558
9559 extern __inline __m256
9560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9561 _mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A,
9562 __m256 __B, const int __imm)
9563 {
9564 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
9565 (__v8sf) __B,
9566 __imm,
9567 (__v8sf) __W,
9568 (__mmask8) __U);
9569 }
9570
9571 extern __inline __m256
9572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9573 _mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B,
9574 const int __imm)
9575 {
9576 return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
9577 (__v8sf) __B,
9578 __imm,
9579 (__v8sf)
9580 _mm256_setzero_ps (),
9581 (__mmask8) __U);
9582 }
9583
9584 extern __inline __m256d
9585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9586 _mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C,
9587 const int __imm)
9588 {
9589 return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
9590 (__v4df) __B,
9591 (__v4di) __C,
9592 __imm,
9593 (__mmask8) -1);
9594 }
9595
9596 extern __inline __m256d
9597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9598 _mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B,
9599 __m256i __C, const int __imm)
9600 {
9601 return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
9602 (__v4df) __B,
9603 (__v4di) __C,
9604 __imm,
9605 (__mmask8) __U);
9606 }
9607
9608 extern __inline __m256d
9609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9610 _mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B,
9611 __m256i __C, const int __imm)
9612 {
9613 return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A,
9614 (__v4df) __B,
9615 (__v4di) __C,
9616 __imm,
9617 (__mmask8) __U);
9618 }
9619
9620 extern __inline __m256
9621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9622 _mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C,
9623 const int __imm)
9624 {
9625 return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
9626 (__v8sf) __B,
9627 (__v8si) __C,
9628 __imm,
9629 (__mmask8) -1);
9630 }
9631
9632 extern __inline __m256
9633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9634 _mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B,
9635 __m256i __C, const int __imm)
9636 {
9637 return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
9638 (__v8sf) __B,
9639 (__v8si) __C,
9640 __imm,
9641 (__mmask8) __U);
9642 }
9643
9644 extern __inline __m256
9645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9646 _mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B,
9647 __m256i __C, const int __imm)
9648 {
9649 return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A,
9650 (__v8sf) __B,
9651 (__v8si) __C,
9652 __imm,
9653 (__mmask8) __U);
9654 }
9655
9656 extern __inline __m128d
9657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9658 _mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C,
9659 const int __imm)
9660 {
9661 return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
9662 (__v2df) __B,
9663 (__v2di) __C,
9664 __imm,
9665 (__mmask8) -1);
9666 }
9667
9668 extern __inline __m128d
9669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9670 _mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B,
9671 __m128i __C, const int __imm)
9672 {
9673 return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
9674 (__v2df) __B,
9675 (__v2di) __C,
9676 __imm,
9677 (__mmask8) __U);
9678 }
9679
9680 extern __inline __m128d
9681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9682 _mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B,
9683 __m128i __C, const int __imm)
9684 {
9685 return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A,
9686 (__v2df) __B,
9687 (__v2di) __C,
9688 __imm,
9689 (__mmask8) __U);
9690 }
9691
9692 extern __inline __m128
9693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9694 _mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm)
9695 {
9696 return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
9697 (__v4sf) __B,
9698 (__v4si) __C,
9699 __imm,
9700 (__mmask8) -1);
9701 }
9702
9703 extern __inline __m128
9704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9705 _mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B,
9706 __m128i __C, const int __imm)
9707 {
9708 return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
9709 (__v4sf) __B,
9710 (__v4si) __C,
9711 __imm,
9712 (__mmask8) __U);
9713 }
9714
9715 extern __inline __m128
9716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9717 _mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B,
9718 __m128i __C, const int __imm)
9719 {
9720 return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A,
9721 (__v4sf) __B,
9722 (__v4si) __C,
9723 __imm,
9724 (__mmask8) __U);
9725 }
9726
9727 extern __inline __m256i
9728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9729 _mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
9730 const int __imm)
9731 {
9732 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
9733 (__v8si) __W,
9734 (__mmask8) __U);
9735 }
9736
9737 extern __inline __m256i
9738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9739 _mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm)
9740 {
9741 return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
9742 (__v8si)
9743 _mm256_setzero_si256 (),
9744 (__mmask8) __U);
9745 }
9746
9747 extern __inline __m128i
9748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9749 _mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
9750 const int __imm)
9751 {
9752 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
9753 (__v4si) __W,
9754 (__mmask8) __U);
9755 }
9756
9757 extern __inline __m128i
9758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9759 _mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm)
9760 {
9761 return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
9762 (__v4si)
9763 _mm_setzero_si128 (),
9764 (__mmask8) __U);
9765 }
9766
9767 extern __inline __m256i
9768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9769 _mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
9770 const int __imm)
9771 {
9772 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
9773 (__v4di) __W,
9774 (__mmask8) __U);
9775 }
9776
9777 extern __inline __m256i
9778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9779 _mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm)
9780 {
9781 return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
9782 (__v4di)
9783 _mm256_setzero_si256 (),
9784 (__mmask8) __U);
9785 }
9786
9787 extern __inline __m128i
9788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9789 _mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
9790 const int __imm)
9791 {
9792 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
9793 (__v2di) __W,
9794 (__mmask8) __U);
9795 }
9796
9797 extern __inline __m128i
9798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9799 _mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm)
9800 {
9801 return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
9802 (__v2di)
9803 _mm_setzero_si128 (),
9804 (__mmask8) __U);
9805 }
9806
9807 extern __inline __m256i
9808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9809 _mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
9810 const int imm)
9811 {
9812 return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
9813 (__v4di) __B,
9814 (__v4di) __C, imm,
9815 (__mmask8) -1);
9816 }
9817
9818 extern __inline __m256i
9819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9820 _mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
9821 __m256i __B, __m256i __C,
9822 const int imm)
9823 {
9824 return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
9825 (__v4di) __B,
9826 (__v4di) __C, imm,
9827 (__mmask8) __U);
9828 }
9829
9830 extern __inline __m256i
9831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9832 _mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
9833 __m256i __B, __m256i __C,
9834 const int imm)
9835 {
9836 return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
9837 (__v4di) __B,
9838 (__v4di) __C,
9839 imm,
9840 (__mmask8) __U);
9841 }
9842
9843 extern __inline __m256i
9844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9845 _mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
9846 const int imm)
9847 {
9848 return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
9849 (__v8si) __B,
9850 (__v8si) __C, imm,
9851 (__mmask8) -1);
9852 }
9853
9854 extern __inline __m256i
9855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9856 _mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
9857 __m256i __B, __m256i __C,
9858 const int imm)
9859 {
9860 return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
9861 (__v8si) __B,
9862 (__v8si) __C, imm,
9863 (__mmask8) __U);
9864 }
9865
9866 extern __inline __m256i
9867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9868 _mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
9869 __m256i __B, __m256i __C,
9870 const int imm)
9871 {
9872 return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
9873 (__v8si) __B,
9874 (__v8si) __C,
9875 imm,
9876 (__mmask8) __U);
9877 }
9878
9879 extern __inline __m128i
9880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9881 _mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
9882 const int imm)
9883 {
9884 return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
9885 (__v2di) __B,
9886 (__v2di) __C, imm,
9887 (__mmask8) -1);
9888 }
9889
9890 extern __inline __m128i
9891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9892 _mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
9893 __m128i __B, __m128i __C, const int imm)
9894 {
9895 return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
9896 (__v2di) __B,
9897 (__v2di) __C, imm,
9898 (__mmask8) __U);
9899 }
9900
9901 extern __inline __m128i
9902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9903 _mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
9904 __m128i __B, __m128i __C, const int imm)
9905 {
9906 return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
9907 (__v2di) __B,
9908 (__v2di) __C,
9909 imm,
9910 (__mmask8) __U);
9911 }
9912
9913 extern __inline __m128i
9914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9915 _mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
9916 const int imm)
9917 {
9918 return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
9919 (__v4si) __B,
9920 (__v4si) __C, imm,
9921 (__mmask8) -1);
9922 }
9923
9924 extern __inline __m128i
9925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9926 _mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
9927 __m128i __B, __m128i __C, const int imm)
9928 {
9929 return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
9930 (__v4si) __B,
9931 (__v4si) __C, imm,
9932 (__mmask8) __U);
9933 }
9934
9935 extern __inline __m128i
9936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9937 _mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
9938 __m128i __B, __m128i __C, const int imm)
9939 {
9940 return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
9941 (__v4si) __B,
9942 (__v4si) __C,
9943 imm,
9944 (__mmask8) __U);
9945 }
9946
9947 extern __inline __m256
9948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9949 _mm256_roundscale_ps (__m256 __A, const int __imm)
9950 {
9951 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
9952 __imm,
9953 (__v8sf)
9954 _mm256_setzero_ps (),
9955 (__mmask8) -1);
9956 }
9957
9958 extern __inline __m256
9959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9960 _mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A,
9961 const int __imm)
9962 {
9963 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
9964 __imm,
9965 (__v8sf) __W,
9966 (__mmask8) __U);
9967 }
9968
9969 extern __inline __m256
9970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9971 _mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm)
9972 {
9973 return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
9974 __imm,
9975 (__v8sf)
9976 _mm256_setzero_ps (),
9977 (__mmask8) __U);
9978 }
9979
9980 extern __inline __m256d
9981 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9982 _mm256_roundscale_pd (__m256d __A, const int __imm)
9983 {
9984 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
9985 __imm,
9986 (__v4df)
9987 _mm256_setzero_pd (),
9988 (__mmask8) -1);
9989 }
9990
9991 extern __inline __m256d
9992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9993 _mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A,
9994 const int __imm)
9995 {
9996 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
9997 __imm,
9998 (__v4df) __W,
9999 (__mmask8) __U);
10000 }
10001
10002 extern __inline __m256d
10003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10004 _mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm)
10005 {
10006 return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10007 __imm,
10008 (__v4df)
10009 _mm256_setzero_pd (),
10010 (__mmask8) __U);
10011 }
10012
10013 extern __inline __m128
10014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10015 _mm_roundscale_ps (__m128 __A, const int __imm)
10016 {
10017 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10018 __imm,
10019 (__v4sf)
10020 _mm_setzero_ps (),
10021 (__mmask8) -1);
10022 }
10023
10024 extern __inline __m128
10025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10026 _mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A,
10027 const int __imm)
10028 {
10029 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10030 __imm,
10031 (__v4sf) __W,
10032 (__mmask8) __U);
10033 }
10034
10035 extern __inline __m128
10036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10037 _mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm)
10038 {
10039 return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10040 __imm,
10041 (__v4sf)
10042 _mm_setzero_ps (),
10043 (__mmask8) __U);
10044 }
10045
10046 extern __inline __m128d
10047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10048 _mm_roundscale_pd (__m128d __A, const int __imm)
10049 {
10050 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10051 __imm,
10052 (__v2df)
10053 _mm_setzero_pd (),
10054 (__mmask8) -1);
10055 }
10056
10057 extern __inline __m128d
10058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10059 _mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A,
10060 const int __imm)
10061 {
10062 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10063 __imm,
10064 (__v2df) __W,
10065 (__mmask8) __U);
10066 }
10067
10068 extern __inline __m128d
10069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10070 _mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm)
10071 {
10072 return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10073 __imm,
10074 (__v2df)
10075 _mm_setzero_pd (),
10076 (__mmask8) __U);
10077 }
10078
10079 extern __inline __m256
10080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10081 _mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
10082 _MM_MANTISSA_SIGN_ENUM __C)
10083 {
10084 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10085 (__C << 2) | __B,
10086 (__v8sf)
10087 _mm256_setzero_ps (),
10088 (__mmask8) -1);
10089 }
10090
10091 extern __inline __m256
10092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10093 _mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A,
10094 _MM_MANTISSA_NORM_ENUM __B,
10095 _MM_MANTISSA_SIGN_ENUM __C)
10096 {
10097 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10098 (__C << 2) | __B,
10099 (__v8sf) __W,
10100 (__mmask8) __U);
10101 }
10102
10103 extern __inline __m256
10104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10105 _mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A,
10106 _MM_MANTISSA_NORM_ENUM __B,
10107 _MM_MANTISSA_SIGN_ENUM __C)
10108 {
10109 return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10110 (__C << 2) | __B,
10111 (__v8sf)
10112 _mm256_setzero_ps (),
10113 (__mmask8) __U);
10114 }
10115
10116 extern __inline __m128
10117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10118 _mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B,
10119 _MM_MANTISSA_SIGN_ENUM __C)
10120 {
10121 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10122 (__C << 2) | __B,
10123 (__v4sf)
10124 _mm_setzero_ps (),
10125 (__mmask8) -1);
10126 }
10127
10128 extern __inline __m128
10129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10130 _mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A,
10131 _MM_MANTISSA_NORM_ENUM __B,
10132 _MM_MANTISSA_SIGN_ENUM __C)
10133 {
10134 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10135 (__C << 2) | __B,
10136 (__v4sf) __W,
10137 (__mmask8) __U);
10138 }
10139
10140 extern __inline __m128
10141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10142 _mm_maskz_getmant_ps (__mmask8 __U, __m128 __A,
10143 _MM_MANTISSA_NORM_ENUM __B,
10144 _MM_MANTISSA_SIGN_ENUM __C)
10145 {
10146 return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10147 (__C << 2) | __B,
10148 (__v4sf)
10149 _mm_setzero_ps (),
10150 (__mmask8) __U);
10151 }
10152
10153 extern __inline __m256d
10154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10155 _mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
10156 _MM_MANTISSA_SIGN_ENUM __C)
10157 {
10158 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10159 (__C << 2) | __B,
10160 (__v4df)
10161 _mm256_setzero_pd (),
10162 (__mmask8) -1);
10163 }
10164
10165 extern __inline __m256d
10166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10167 _mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A,
10168 _MM_MANTISSA_NORM_ENUM __B,
10169 _MM_MANTISSA_SIGN_ENUM __C)
10170 {
10171 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10172 (__C << 2) | __B,
10173 (__v4df) __W,
10174 (__mmask8) __U);
10175 }
10176
10177 extern __inline __m256d
10178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10179 _mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A,
10180 _MM_MANTISSA_NORM_ENUM __B,
10181 _MM_MANTISSA_SIGN_ENUM __C)
10182 {
10183 return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10184 (__C << 2) | __B,
10185 (__v4df)
10186 _mm256_setzero_pd (),
10187 (__mmask8) __U);
10188 }
10189
10190 extern __inline __m128d
10191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10192 _mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B,
10193 _MM_MANTISSA_SIGN_ENUM __C)
10194 {
10195 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10196 (__C << 2) | __B,
10197 (__v2df)
10198 _mm_setzero_pd (),
10199 (__mmask8) -1);
10200 }
10201
10202 extern __inline __m128d
10203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10204 _mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A,
10205 _MM_MANTISSA_NORM_ENUM __B,
10206 _MM_MANTISSA_SIGN_ENUM __C)
10207 {
10208 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10209 (__C << 2) | __B,
10210 (__v2df) __W,
10211 (__mmask8) __U);
10212 }
10213
10214 extern __inline __m128d
10215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10216 _mm_maskz_getmant_pd (__mmask8 __U, __m128d __A,
10217 _MM_MANTISSA_NORM_ENUM __B,
10218 _MM_MANTISSA_SIGN_ENUM __C)
10219 {
10220 return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10221 (__C << 2) | __B,
10222 (__v2df)
10223 _mm_setzero_pd (),
10224 (__mmask8) __U);
10225 }
10226
10227 extern __inline __m256
10228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10229 _mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask,
10230 __m256i __index, float const *__addr,
10231 int __scale)
10232 {
10233 return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old,
10234 __addr,
10235 (__v8si) __index,
10236 __mask, __scale);
10237 }
10238
10239 extern __inline __m128
10240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10241 _mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask,
10242 __m128i __index, float const *__addr,
10243 int __scale)
10244 {
10245 return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old,
10246 __addr,
10247 (__v4si) __index,
10248 __mask, __scale);
10249 }
10250
10251 extern __inline __m256d
10252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10253 _mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask,
10254 __m128i __index, double const *__addr,
10255 int __scale)
10256 {
10257 return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old,
10258 __addr,
10259 (__v4si) __index,
10260 __mask, __scale);
10261 }
10262
10263 extern __inline __m128d
10264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10265 _mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask,
10266 __m128i __index, double const *__addr,
10267 int __scale)
10268 {
10269 return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old,
10270 __addr,
10271 (__v4si) __index,
10272 __mask, __scale);
10273 }
10274
10275 extern __inline __m128
10276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10277 _mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10278 __m256i __index, float const *__addr,
10279 int __scale)
10280 {
10281 return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old,
10282 __addr,
10283 (__v4di) __index,
10284 __mask, __scale);
10285 }
10286
10287 extern __inline __m128
10288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10289 _mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10290 __m128i __index, float const *__addr,
10291 int __scale)
10292 {
10293 return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old,
10294 __addr,
10295 (__v2di) __index,
10296 __mask, __scale);
10297 }
10298
10299 extern __inline __m256d
10300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10301 _mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask,
10302 __m256i __index, double const *__addr,
10303 int __scale)
10304 {
10305 return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old,
10306 __addr,
10307 (__v4di) __index,
10308 __mask, __scale);
10309 }
10310
10311 extern __inline __m128d
10312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10313 _mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask,
10314 __m128i __index, double const *__addr,
10315 int __scale)
10316 {
10317 return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old,
10318 __addr,
10319 (__v2di) __index,
10320 __mask, __scale);
10321 }
10322
10323 extern __inline __m256i
10324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10325 _mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask,
10326 __m256i __index, int const *__addr,
10327 int __scale)
10328 {
10329 return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old,
10330 __addr,
10331 (__v8si) __index,
10332 __mask, __scale);
10333 }
10334
10335 extern __inline __m128i
10336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10337 _mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10338 __m128i __index, int const *__addr,
10339 int __scale)
10340 {
10341 return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old,
10342 __addr,
10343 (__v4si) __index,
10344 __mask, __scale);
10345 }
10346
10347 extern __inline __m256i
10348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10349 _mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask,
10350 __m128i __index, long long const *__addr,
10351 int __scale)
10352 {
10353 return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old,
10354 __addr,
10355 (__v4si) __index,
10356 __mask, __scale);
10357 }
10358
10359 extern __inline __m128i
10360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10361 _mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask,
10362 __m128i __index, long long const *__addr,
10363 int __scale)
10364 {
10365 return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old,
10366 __addr,
10367 (__v4si) __index,
10368 __mask, __scale);
10369 }
10370
10371 extern __inline __m128i
10372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10373 _mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10374 __m256i __index, int const *__addr,
10375 int __scale)
10376 {
10377 return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old,
10378 __addr,
10379 (__v4di) __index,
10380 __mask, __scale);
10381 }
10382
10383 extern __inline __m128i
10384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10385 _mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10386 __m128i __index, int const *__addr,
10387 int __scale)
10388 {
10389 return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old,
10390 __addr,
10391 (__v2di) __index,
10392 __mask, __scale);
10393 }
10394
10395 extern __inline __m256i
10396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10397 _mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask,
10398 __m256i __index, long long const *__addr,
10399 int __scale)
10400 {
10401 return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old,
10402 __addr,
10403 (__v4di) __index,
10404 __mask, __scale);
10405 }
10406
10407 extern __inline __m128i
10408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10409 _mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask,
10410 __m128i __index, long long const *__addr,
10411 int __scale)
10412 {
10413 return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old,
10414 __addr,
10415 (__v2di) __index,
10416 __mask, __scale);
10417 }
10418
10419 extern __inline void
10420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10421 _mm256_i32scatter_ps (float *__addr, __m256i __index,
10422 __m256 __v1, const int __scale)
10423 {
10424 __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,
10425 (__v8si) __index, (__v8sf) __v1,
10426 __scale);
10427 }
10428
10429 extern __inline void
10430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10431 _mm256_mask_i32scatter_ps (float *__addr, __mmask8 __mask,
10432 __m256i __index, __m256 __v1,
10433 const int __scale)
10434 {
10435 __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,
10436 (__v8sf) __v1, __scale);
10437 }
10438
10439 extern __inline void
10440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10441 _mm_i32scatter_ps (float *__addr, __m128i __index, __m128 __v1,
10442 const int __scale)
10443 {
10444 __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,
10445 (__v4si) __index, (__v4sf) __v1,
10446 __scale);
10447 }
10448
10449 extern __inline void
10450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10451 _mm_mask_i32scatter_ps (float *__addr, __mmask8 __mask,
10452 __m128i __index, __m128 __v1,
10453 const int __scale)
10454 {
10455 __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index,
10456 (__v4sf) __v1, __scale);
10457 }
10458
10459 extern __inline void
10460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10461 _mm256_i32scatter_pd (double *__addr, __m128i __index,
10462 __m256d __v1, const int __scale)
10463 {
10464 __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,
10465 (__v4si) __index, (__v4df) __v1,
10466 __scale);
10467 }
10468
10469 extern __inline void
10470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10471 _mm256_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
10472 __m128i __index, __m256d __v1,
10473 const int __scale)
10474 {
10475 __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index,
10476 (__v4df) __v1, __scale);
10477 }
10478
10479 extern __inline void
10480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10481 _mm_i32scatter_pd (double *__addr, __m128i __index,
10482 __m128d __v1, const int __scale)
10483 {
10484 __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF,
10485 (__v4si) __index, (__v2df) __v1,
10486 __scale);
10487 }
10488
10489 extern __inline void
10490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10491 _mm_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
10492 __m128i __index, __m128d __v1,
10493 const int __scale)
10494 {
10495 __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,
10496 (__v2df) __v1, __scale);
10497 }
10498
10499 extern __inline void
10500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10501 _mm256_i64scatter_ps (float *__addr, __m256i __index,
10502 __m128 __v1, const int __scale)
10503 {
10504 __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF,
10505 (__v4di) __index, (__v4sf) __v1,
10506 __scale);
10507 }
10508
10509 extern __inline void
10510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10511 _mm256_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
10512 __m256i __index, __m128 __v1,
10513 const int __scale)
10514 {
10515 __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index,
10516 (__v4sf) __v1, __scale);
10517 }
10518
10519 extern __inline void
10520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10521 _mm_i64scatter_ps (float *__addr, __m128i __index, __m128 __v1,
10522 const int __scale)
10523 {
10524 __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,
10525 (__v2di) __index, (__v4sf) __v1,
10526 __scale);
10527 }
10528
10529 extern __inline void
10530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10531 _mm_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
10532 __m128i __index, __m128 __v1,
10533 const int __scale)
10534 {
10535 __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,
10536 (__v4sf) __v1, __scale);
10537 }
10538
10539 extern __inline void
10540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10541 _mm256_i64scatter_pd (double *__addr, __m256i __index,
10542 __m256d __v1, const int __scale)
10543 {
10544 __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,
10545 (__v4di) __index, (__v4df) __v1,
10546 __scale);
10547 }
10548
10549 extern __inline void
10550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10551 _mm256_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
10552 __m256i __index, __m256d __v1,
10553 const int __scale)
10554 {
10555 __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,
10556 (__v4df) __v1, __scale);
10557 }
10558
10559 extern __inline void
10560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10561 _mm_i64scatter_pd (double *__addr, __m128i __index,
10562 __m128d __v1, const int __scale)
10563 {
10564 __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF,
10565 (__v2di) __index, (__v2df) __v1,
10566 __scale);
10567 }
10568
10569 extern __inline void
10570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10571 _mm_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
10572 __m128i __index, __m128d __v1,
10573 const int __scale)
10574 {
10575 __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index,
10576 (__v2df) __v1, __scale);
10577 }
10578
10579 extern __inline void
10580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10581 _mm256_i32scatter_epi32 (int *__addr, __m256i __index,
10582 __m256i __v1, const int __scale)
10583 {
10584 __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,
10585 (__v8si) __index, (__v8si) __v1,
10586 __scale);
10587 }
10588
10589 extern __inline void
10590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10591 _mm256_mask_i32scatter_epi32 (int *__addr, __mmask8 __mask,
10592 __m256i __index, __m256i __v1,
10593 const int __scale)
10594 {
10595 __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,
10596 (__v8si) __v1, __scale);
10597 }
10598
10599 extern __inline void
10600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10601 _mm_i32scatter_epi32 (int *__addr, __m128i __index,
10602 __m128i __v1, const int __scale)
10603 {
10604 __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,
10605 (__v4si) __index, (__v4si) __v1,
10606 __scale);
10607 }
10608
10609 extern __inline void
10610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10611 _mm_mask_i32scatter_epi32 (int *__addr, __mmask8 __mask,
10612 __m128i __index, __m128i __v1,
10613 const int __scale)
10614 {
10615 __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,
10616 (__v4si) __v1, __scale);
10617 }
10618
10619 extern __inline void
10620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10621 _mm256_i32scatter_epi64 (long long *__addr, __m128i __index,
10622 __m256i __v1, const int __scale)
10623 {
10624 __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,
10625 (__v4si) __index, (__v4di) __v1,
10626 __scale);
10627 }
10628
10629 extern __inline void
10630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10631 _mm256_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
10632 __m128i __index, __m256i __v1,
10633 const int __scale)
10634 {
10635 __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,
10636 (__v4di) __v1, __scale);
10637 }
10638
10639 extern __inline void
10640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10641 _mm_i32scatter_epi64 (long long *__addr, __m128i __index,
10642 __m128i __v1, const int __scale)
10643 {
10644 __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,
10645 (__v4si) __index, (__v2di) __v1,
10646 __scale);
10647 }
10648
10649 extern __inline void
10650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10651 _mm_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
10652 __m128i __index, __m128i __v1,
10653 const int __scale)
10654 {
10655 __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index,
10656 (__v2di) __v1, __scale);
10657 }
10658
10659 extern __inline void
10660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10661 _mm256_i64scatter_epi32 (int *__addr, __m256i __index,
10662 __m128i __v1, const int __scale)
10663 {
10664 __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF,
10665 (__v4di) __index, (__v4si) __v1,
10666 __scale);
10667 }
10668
10669 extern __inline void
10670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10671 _mm256_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
10672 __m256i __index, __m128i __v1,
10673 const int __scale)
10674 {
10675 __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index,
10676 (__v4si) __v1, __scale);
10677 }
10678
10679 extern __inline void
10680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10681 _mm_i64scatter_epi32 (int *__addr, __m128i __index,
10682 __m128i __v1, const int __scale)
10683 {
10684 __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,
10685 (__v2di) __index, (__v4si) __v1,
10686 __scale);
10687 }
10688
10689 extern __inline void
10690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10691 _mm_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
10692 __m128i __index, __m128i __v1,
10693 const int __scale)
10694 {
10695 __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,
10696 (__v4si) __v1, __scale);
10697 }
10698
10699 extern __inline void
10700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10701 _mm256_i64scatter_epi64 (long long *__addr, __m256i __index,
10702 __m256i __v1, const int __scale)
10703 {
10704 __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF,
10705 (__v4di) __index, (__v4di) __v1,
10706 __scale);
10707 }
10708
10709 extern __inline void
10710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10711 _mm256_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
10712 __m256i __index, __m256i __v1,
10713 const int __scale)
10714 {
10715 __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,
10716 (__v4di) __v1, __scale);
10717 }
10718
10719 extern __inline void
10720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10721 _mm_i64scatter_epi64 (long long *__addr, __m128i __index,
10722 __m128i __v1, const int __scale)
10723 {
10724 __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF,
10725 (__v2di) __index, (__v2di) __v1,
10726 __scale);
10727 }
10728
10729 extern __inline void
10730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10731 _mm_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
10732 __m128i __index, __m128i __v1,
10733 const int __scale)
10734 {
10735 __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,
10736 (__v2di) __v1, __scale);
10737 }
10738
10739 extern __inline __m256i
10740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10741 _mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10742 _MM_PERM_ENUM __mask)
10743 {
10744 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
10745 (__v8si) __W,
10746 (__mmask8) __U);
10747 }
10748
10749 extern __inline __m256i
10750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10751 _mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A,
10752 _MM_PERM_ENUM __mask)
10753 {
10754 return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
10755 (__v8si)
10756 _mm256_setzero_si256 (),
10757 (__mmask8) __U);
10758 }
10759
10760 extern __inline __m128i
10761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10762 _mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10763 _MM_PERM_ENUM __mask)
10764 {
10765 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
10766 (__v4si) __W,
10767 (__mmask8) __U);
10768 }
10769
10770 extern __inline __m128i
10771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10772 _mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A,
10773 _MM_PERM_ENUM __mask)
10774 {
10775 return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
10776 (__v4si)
10777 _mm_setzero_si128 (),
10778 (__mmask8) __U);
10779 }
10780
10781 extern __inline __m256i
10782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10783 _mm256_rol_epi32 (__m256i __A, const int __B)
10784 {
10785 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
10786 (__v8si)
10787 _mm256_setzero_si256 (),
10788 (__mmask8) -1);
10789 }
10790
10791 extern __inline __m256i
10792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10793 _mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10794 const int __B)
10795 {
10796 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
10797 (__v8si) __W,
10798 (__mmask8) __U);
10799 }
10800
10801 extern __inline __m256i
10802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10803 _mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B)
10804 {
10805 return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
10806 (__v8si)
10807 _mm256_setzero_si256 (),
10808 (__mmask8) __U);
10809 }
10810
10811 extern __inline __m128i
10812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10813 _mm_rol_epi32 (__m128i __A, const int __B)
10814 {
10815 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
10816 (__v4si)
10817 _mm_setzero_si128 (),
10818 (__mmask8) -1);
10819 }
10820
10821 extern __inline __m128i
10822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10823 _mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10824 const int __B)
10825 {
10826 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
10827 (__v4si) __W,
10828 (__mmask8) __U);
10829 }
10830
10831 extern __inline __m128i
10832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10833 _mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B)
10834 {
10835 return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
10836 (__v4si)
10837 _mm_setzero_si128 (),
10838 (__mmask8) __U);
10839 }
10840
10841 extern __inline __m256i
10842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10843 _mm256_ror_epi32 (__m256i __A, const int __B)
10844 {
10845 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
10846 (__v8si)
10847 _mm256_setzero_si256 (),
10848 (__mmask8) -1);
10849 }
10850
10851 extern __inline __m256i
10852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10853 _mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10854 const int __B)
10855 {
10856 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
10857 (__v8si) __W,
10858 (__mmask8) __U);
10859 }
10860
10861 extern __inline __m256i
10862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10863 _mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B)
10864 {
10865 return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
10866 (__v8si)
10867 _mm256_setzero_si256 (),
10868 (__mmask8) __U);
10869 }
10870
10871 extern __inline __m128i
10872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10873 _mm_ror_epi32 (__m128i __A, const int __B)
10874 {
10875 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
10876 (__v4si)
10877 _mm_setzero_si128 (),
10878 (__mmask8) -1);
10879 }
10880
10881 extern __inline __m128i
10882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10883 _mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10884 const int __B)
10885 {
10886 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
10887 (__v4si) __W,
10888 (__mmask8) __U);
10889 }
10890
10891 extern __inline __m128i
10892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10893 _mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B)
10894 {
10895 return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
10896 (__v4si)
10897 _mm_setzero_si128 (),
10898 (__mmask8) __U);
10899 }
10900
10901 extern __inline __m256i
10902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10903 _mm256_rol_epi64 (__m256i __A, const int __B)
10904 {
10905 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
10906 (__v4di)
10907 _mm256_setzero_si256 (),
10908 (__mmask8) -1);
10909 }
10910
10911 extern __inline __m256i
10912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10913 _mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
10914 const int __B)
10915 {
10916 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
10917 (__v4di) __W,
10918 (__mmask8) __U);
10919 }
10920
10921 extern __inline __m256i
10922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10923 _mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B)
10924 {
10925 return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
10926 (__v4di)
10927 _mm256_setzero_si256 (),
10928 (__mmask8) __U);
10929 }
10930
10931 extern __inline __m128i
10932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10933 _mm_rol_epi64 (__m128i __A, const int __B)
10934 {
10935 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
10936 (__v2di)
10937 _mm_setzero_di (),
10938 (__mmask8) -1);
10939 }
10940
10941 extern __inline __m128i
10942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10943 _mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
10944 const int __B)
10945 {
10946 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
10947 (__v2di) __W,
10948 (__mmask8) __U);
10949 }
10950
10951 extern __inline __m128i
10952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10953 _mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
10954 {
10955 return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
10956 (__v2di)
10957 _mm_setzero_di (),
10958 (__mmask8) __U);
10959 }
10960
10961 extern __inline __m256i
10962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10963 _mm256_ror_epi64 (__m256i __A, const int __B)
10964 {
10965 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
10966 (__v4di)
10967 _mm256_setzero_si256 (),
10968 (__mmask8) -1);
10969 }
10970
10971 extern __inline __m256i
10972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10973 _mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
10974 const int __B)
10975 {
10976 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
10977 (__v4di) __W,
10978 (__mmask8) __U);
10979 }
10980
10981 extern __inline __m256i
10982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10983 _mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B)
10984 {
10985 return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
10986 (__v4di)
10987 _mm256_setzero_si256 (),
10988 (__mmask8) __U);
10989 }
10990
10991 extern __inline __m128i
10992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10993 _mm_ror_epi64 (__m128i __A, const int __B)
10994 {
10995 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
10996 (__v2di)
10997 _mm_setzero_di (),
10998 (__mmask8) -1);
10999 }
11000
11001 extern __inline __m128i
11002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11003 _mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11004 const int __B)
11005 {
11006 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11007 (__v2di) __W,
11008 (__mmask8) __U);
11009 }
11010
11011 extern __inline __m128i
11012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11013 _mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
11014 {
11015 return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11016 (__v2di)
11017 _mm_setzero_di (),
11018 (__mmask8) __U);
11019 }
11020
11021 extern __inline __m128i
11022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11023 _mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm)
11024 {
11025 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11026 (__v4si) __B, __imm,
11027 (__v4si)
11028 _mm_setzero_si128 (),
11029 (__mmask8) -1);
11030 }
11031
11032 extern __inline __m128i
11033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11034 _mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11035 __m128i __B, const int __imm)
11036 {
11037 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11038 (__v4si) __B, __imm,
11039 (__v4si) __W,
11040 (__mmask8) __U);
11041 }
11042
11043 extern __inline __m128i
11044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11045 _mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B,
11046 const int __imm)
11047 {
11048 return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11049 (__v4si) __B, __imm,
11050 (__v4si)
11051 _mm_setzero_si128 (),
11052 (__mmask8) __U);
11053 }
11054
11055 extern __inline __m128i
11056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11057 _mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
11058 {
11059 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11060 (__v2di) __B, __imm,
11061 (__v2di)
11062 _mm_setzero_di (),
11063 (__mmask8) -1);
11064 }
11065
11066 extern __inline __m128i
11067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11068 _mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11069 __m128i __B, const int __imm)
11070 {
11071 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11072 (__v2di) __B, __imm,
11073 (__v2di) __W,
11074 (__mmask8) __U);
11075 }
11076
11077 extern __inline __m128i
11078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11079 _mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
11080 const int __imm)
11081 {
11082 return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11083 (__v2di) __B, __imm,
11084 (__v2di)
11085 _mm_setzero_di (),
11086 (__mmask8) __U);
11087 }
11088
11089 extern __inline __m256i
11090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11091 _mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm)
11092 {
11093 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11094 (__v8si) __B, __imm,
11095 (__v8si)
11096 _mm256_setzero_si256 (),
11097 (__mmask8) -1);
11098 }
11099
11100 extern __inline __m256i
11101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11102 _mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11103 __m256i __B, const int __imm)
11104 {
11105 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11106 (__v8si) __B, __imm,
11107 (__v8si) __W,
11108 (__mmask8) __U);
11109 }
11110
11111 extern __inline __m256i
11112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11113 _mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B,
11114 const int __imm)
11115 {
11116 return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11117 (__v8si) __B, __imm,
11118 (__v8si)
11119 _mm256_setzero_si256 (),
11120 (__mmask8) __U);
11121 }
11122
11123 extern __inline __m256i
11124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11125 _mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm)
11126 {
11127 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11128 (__v4di) __B, __imm,
11129 (__v4di)
11130 _mm256_setzero_si256 (),
11131 (__mmask8) -1);
11132 }
11133
11134 extern __inline __m256i
11135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11136 _mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11137 __m256i __B, const int __imm)
11138 {
11139 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11140 (__v4di) __B, __imm,
11141 (__v4di) __W,
11142 (__mmask8) __U);
11143 }
11144
11145 extern __inline __m256i
11146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11147 _mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B,
11148 const int __imm)
11149 {
11150 return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11151 (__v4di) __B, __imm,
11152 (__v4di)
11153 _mm256_setzero_si256 (),
11154 (__mmask8) __U);
11155 }
11156
11157 extern __inline __m128i
11158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11159 _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A,
11160 const int __I)
11161 {
11162 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11163 (__v8hi) __W,
11164 (__mmask8) __U);
11165 }
11166
11167 extern __inline __m128i
11168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11169 _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
11170 {
11171 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11172 (__v8hi)
11173 _mm_setzero_hi (),
11174 (__mmask8) __U);
11175 }
11176
11177 extern __inline __m128i
11178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11179 _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A,
11180 const int __I)
11181 {
11182 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11183 (__v8hi) __W,
11184 (__mmask8) __U);
11185 }
11186
11187 extern __inline __m128i
11188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11189 _mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
11190 {
11191 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11192 (__v8hi)
11193 _mm_setzero_hi (),
11194 (__mmask8) __U);
11195 }
11196
11197 extern __inline __m256i
11198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11199 _mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11200 const int __imm)
11201 {
11202 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11203 (__v8si) __W,
11204 (__mmask8) __U);
11205 }
11206
11207 extern __inline __m256i
11208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11209 _mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm)
11210 {
11211 return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11212 (__v8si)
11213 _mm256_setzero_si256 (),
11214 (__mmask8) __U);
11215 }
11216
11217 extern __inline __m128i
11218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11219 _mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11220 const int __imm)
11221 {
11222 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11223 (__v4si) __W,
11224 (__mmask8) __U);
11225 }
11226
11227 extern __inline __m128i
11228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11229 _mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm)
11230 {
11231 return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11232 (__v4si)
11233 _mm_setzero_si128 (),
11234 (__mmask8) __U);
11235 }
11236
11237 extern __inline __m256i
11238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11239 _mm256_srai_epi64 (__m256i __A, const int __imm)
11240 {
11241 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11242 (__v4di)
11243 _mm256_setzero_si256 (),
11244 (__mmask8) -1);
11245 }
11246
11247 extern __inline __m256i
11248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11249 _mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11250 const int __imm)
11251 {
11252 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11253 (__v4di) __W,
11254 (__mmask8) __U);
11255 }
11256
11257 extern __inline __m256i
11258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11259 _mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm)
11260 {
11261 return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11262 (__v4di)
11263 _mm256_setzero_si256 (),
11264 (__mmask8) __U);
11265 }
11266
11267 extern __inline __m128i
11268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11269 _mm_srai_epi64 (__m128i __A, const int __imm)
11270 {
11271 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11272 (__v2di)
11273 _mm_setzero_di (),
11274 (__mmask8) -1);
11275 }
11276
11277 extern __inline __m128i
11278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11279 _mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11280 const int __imm)
11281 {
11282 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11283 (__v2di) __W,
11284 (__mmask8) __U);
11285 }
11286
11287 extern __inline __m128i
11288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11289 _mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm)
11290 {
11291 return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11292 (__v2di)
11293 _mm_setzero_si128 (),
11294 (__mmask8) __U);
11295 }
11296
11297 extern __inline __m128i
11298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11299 _mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
11300 {
11301 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11302 (__v4si) __W,
11303 (__mmask8) __U);
11304 }
11305
11306 extern __inline __m128i
11307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11308 _mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B)
11309 {
11310 return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11311 (__v4si)
11312 _mm_setzero_si128 (),
11313 (__mmask8) __U);
11314 }
11315
11316 extern __inline __m128i
11317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11318 _mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
11319 {
11320 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11321 (__v2di) __W,
11322 (__mmask8) __U);
11323 }
11324
11325 extern __inline __m128i
11326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11327 _mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
11328 {
11329 return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11330 (__v2di)
11331 _mm_setzero_di (),
11332 (__mmask8) __U);
11333 }
11334
11335 extern __inline __m256i
11336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11337 _mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11338 int __B)
11339 {
11340 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
11341 (__v8si) __W,
11342 (__mmask8) __U);
11343 }
11344
11345 extern __inline __m256i
11346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11347 _mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B)
11348 {
11349 return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
11350 (__v8si)
11351 _mm256_setzero_si256 (),
11352 (__mmask8) __U);
11353 }
11354
11355 extern __inline __m256i
11356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11357 _mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11358 int __B)
11359 {
11360 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
11361 (__v4di) __W,
11362 (__mmask8) __U);
11363 }
11364
11365 extern __inline __m256i
11366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11367 _mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B)
11368 {
11369 return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
11370 (__v4di)
11371 _mm256_setzero_si256 (),
11372 (__mmask8) __U);
11373 }
11374
11375 extern __inline __m256d
11376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11377 _mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X,
11378 const int __imm)
11379 {
11380 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
11381 (__v4df) __W,
11382 (__mmask8) __U);
11383 }
11384
11385 extern __inline __m256d
11386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11387 _mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm)
11388 {
11389 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
11390 (__v4df)
11391 _mm256_setzero_pd (),
11392 (__mmask8) __U);
11393 }
11394
11395 extern __inline __m256d
11396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11397 _mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X,
11398 const int __C)
11399 {
11400 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
11401 (__v4df) __W,
11402 (__mmask8) __U);
11403 }
11404
11405 extern __inline __m256d
11406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11407 _mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C)
11408 {
11409 return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
11410 (__v4df)
11411 _mm256_setzero_pd (),
11412 (__mmask8) __U);
11413 }
11414
11415 extern __inline __m128d
11416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11417 _mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X,
11418 const int __C)
11419 {
11420 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
11421 (__v2df) __W,
11422 (__mmask8) __U);
11423 }
11424
11425 extern __inline __m128d
11426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11427 _mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C)
11428 {
11429 return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
11430 (__v2df)
11431 _mm_setzero_pd (),
11432 (__mmask8) __U);
11433 }
11434
11435 extern __inline __m256
11436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11437 _mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X,
11438 const int __C)
11439 {
11440 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
11441 (__v8sf) __W,
11442 (__mmask8) __U);
11443 }
11444
11445 extern __inline __m256
11446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11447 _mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C)
11448 {
11449 return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
11450 (__v8sf)
11451 _mm256_setzero_ps (),
11452 (__mmask8) __U);
11453 }
11454
11455 extern __inline __m128
11456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11457 _mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X,
11458 const int __C)
11459 {
11460 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
11461 (__v4sf) __W,
11462 (__mmask8) __U);
11463 }
11464
11465 extern __inline __m128
11466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11467 _mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
11468 {
11469 return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
11470 (__v4sf)
11471 _mm_setzero_ps (),
11472 (__mmask8) __U);
11473 }
11474
11475 extern __inline __m256d
11476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11477 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
11478 {
11479 return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
11480 (__v4df) __W,
11481 (__mmask8) __U);
11482 }
11483
11484 extern __inline __m256
11485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11486 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
11487 {
11488 return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
11489 (__v8sf) __W,
11490 (__mmask8) __U);
11491 }
11492
11493 extern __inline __m256i
11494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11495 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
11496 {
11497 return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
11498 (__v4di) __W,
11499 (__mmask8) __U);
11500 }
11501
11502 extern __inline __m256i
11503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11504 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
11505 {
11506 return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
11507 (__v8si) __W,
11508 (__mmask8) __U);
11509 }
11510
11511 extern __inline __m128d
11512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11513 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
11514 {
11515 return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
11516 (__v2df) __W,
11517 (__mmask8) __U);
11518 }
11519
11520 extern __inline __m128
11521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11522 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
11523 {
11524 return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
11525 (__v4sf) __W,
11526 (__mmask8) __U);
11527 }
11528
11529 extern __inline __m128i
11530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11531 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
11532 {
11533 return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
11534 (__v2di) __W,
11535 (__mmask8) __U);
11536 }
11537
11538 extern __inline __m128i
11539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11540 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
11541 {
11542 return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
11543 (__v4si) __W,
11544 (__mmask8) __U);
11545 }
11546
11547 extern __inline __mmask8
11548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11549 _mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
11550 {
11551 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
11552 (__v4di) __Y, __P,
11553 (__mmask8) -1);
11554 }
11555
11556 extern __inline __mmask8
11557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11558 _mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P)
11559 {
11560 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11561 (__v8si) __Y, __P,
11562 (__mmask8) -1);
11563 }
11564
11565 extern __inline __mmask8
11566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11567 _mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P)
11568 {
11569 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11570 (__v4di) __Y, __P,
11571 (__mmask8) -1);
11572 }
11573
11574 extern __inline __mmask8
11575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11576 _mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P)
11577 {
11578 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11579 (__v8si) __Y, __P,
11580 (__mmask8) -1);
11581 }
11582
11583 extern __inline __mmask8
11584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11585 _mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P)
11586 {
11587 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
11588 (__v4df) __Y, __P,
11589 (__mmask8) -1);
11590 }
11591
11592 extern __inline __mmask8
11593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11594 _mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P)
11595 {
11596 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
11597 (__v8sf) __Y, __P,
11598 (__mmask8) -1);
11599 }
11600
11601 extern __inline __mmask8
11602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11603 _mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11604 const int __P)
11605 {
11606 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
11607 (__v4di) __Y, __P,
11608 (__mmask8) __U);
11609 }
11610
11611 extern __inline __mmask8
11612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11613 _mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11614 const int __P)
11615 {
11616 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11617 (__v8si) __Y, __P,
11618 (__mmask8) __U);
11619 }
11620
11621 extern __inline __mmask8
11622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11623 _mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11624 const int __P)
11625 {
11626 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11627 (__v4di) __Y, __P,
11628 (__mmask8) __U);
11629 }
11630
11631 extern __inline __mmask8
11632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11633 _mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
11634 const int __P)
11635 {
11636 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11637 (__v8si) __Y, __P,
11638 (__mmask8) __U);
11639 }
11640
11641 extern __inline __mmask8
11642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11643 _mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y,
11644 const int __P)
11645 {
11646 return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
11647 (__v4df) __Y, __P,
11648 (__mmask8) __U);
11649 }
11650
11651 extern __inline __mmask8
11652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11653 _mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y,
11654 const int __P)
11655 {
11656 return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
11657 (__v8sf) __Y, __P,
11658 (__mmask8) __U);
11659 }
11660
11661 extern __inline __mmask8
11662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11663 _mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P)
11664 {
11665 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
11666 (__v2di) __Y, __P,
11667 (__mmask8) -1);
11668 }
11669
11670 extern __inline __mmask8
11671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11672 _mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P)
11673 {
11674 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
11675 (__v4si) __Y, __P,
11676 (__mmask8) -1);
11677 }
11678
11679 extern __inline __mmask8
11680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11681 _mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P)
11682 {
11683 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
11684 (__v2di) __Y, __P,
11685 (__mmask8) -1);
11686 }
11687
11688 extern __inline __mmask8
11689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11690 _mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P)
11691 {
11692 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
11693 (__v4si) __Y, __P,
11694 (__mmask8) -1);
11695 }
11696
11697 extern __inline __mmask8
11698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11699 _mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P)
11700 {
11701 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
11702 (__v2df) __Y, __P,
11703 (__mmask8) -1);
11704 }
11705
11706 extern __inline __mmask8
11707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11708 _mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P)
11709 {
11710 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
11711 (__v4sf) __Y, __P,
11712 (__mmask8) -1);
11713 }
11714
11715 extern __inline __mmask8
11716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11717 _mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11718 const int __P)
11719 {
11720 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
11721 (__v2di) __Y, __P,
11722 (__mmask8) __U);
11723 }
11724
11725 extern __inline __mmask8
11726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11727 _mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11728 const int __P)
11729 {
11730 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
11731 (__v4si) __Y, __P,
11732 (__mmask8) __U);
11733 }
11734
11735 extern __inline __mmask8
11736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11737 _mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11738 const int __P)
11739 {
11740 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
11741 (__v2di) __Y, __P,
11742 (__mmask8) __U);
11743 }
11744
11745 extern __inline __mmask8
11746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11747 _mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
11748 const int __P)
11749 {
11750 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
11751 (__v4si) __Y, __P,
11752 (__mmask8) __U);
11753 }
11754
11755 extern __inline __mmask8
11756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11757 _mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y,
11758 const int __P)
11759 {
11760 return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
11761 (__v2df) __Y, __P,
11762 (__mmask8) __U);
11763 }
11764
11765 extern __inline __mmask8
11766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11767 _mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y,
11768 const int __P)
11769 {
11770 return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
11771 (__v4sf) __Y, __P,
11772 (__mmask8) __U);
11773 }
11774
11775 extern __inline __m256d
11776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11777 _mm256_permutex_pd (__m256d __X, const int __M)
11778 {
11779 return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M,
11780 (__v4df)
11781 _mm256_undefined_pd (),
11782 (__mmask8) -1);
11783 }
11784
11785 extern __inline __mmask8
11786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11787 _mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11788 {
11789 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11790 (__v8si) __Y, 4,
11791 (__mmask8) __M);
11792 }
11793
11794 extern __inline __mmask8
11795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11796 _mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
11797 {
11798 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11799 (__v8si) __Y, 4,
11800 (__mmask8) - 1);
11801 }
11802
11803 extern __inline __mmask8
11804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11805 _mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11806 {
11807 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11808 (__v8si) __Y, 1,
11809 (__mmask8) __M);
11810 }
11811
11812 extern __inline __mmask8
11813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11814 _mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
11815 {
11816 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11817 (__v8si) __Y, 1,
11818 (__mmask8) - 1);
11819 }
11820
11821 extern __inline __mmask8
11822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11823 _mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11824 {
11825 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11826 (__v8si) __Y, 5,
11827 (__mmask8) __M);
11828 }
11829
11830 extern __inline __mmask8
11831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11832 _mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
11833 {
11834 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11835 (__v8si) __Y, 5,
11836 (__mmask8) - 1);
11837 }
11838
11839 extern __inline __mmask8
11840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11841 _mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11842 {
11843 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11844 (__v8si) __Y, 2,
11845 (__mmask8) __M);
11846 }
11847
11848 extern __inline __mmask8
11849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11850 _mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
11851 {
11852 return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
11853 (__v8si) __Y, 2,
11854 (__mmask8) - 1);
11855 }
11856
11857 extern __inline __mmask8
11858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11859 _mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11860 {
11861 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11862 (__v4di) __Y, 4,
11863 (__mmask8) __M);
11864 }
11865
11866 extern __inline __mmask8
11867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11868 _mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
11869 {
11870 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11871 (__v4di) __Y, 4,
11872 (__mmask8) - 1);
11873 }
11874
11875 extern __inline __mmask8
11876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11877 _mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11878 {
11879 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11880 (__v4di) __Y, 1,
11881 (__mmask8) __M);
11882 }
11883
11884 extern __inline __mmask8
11885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11886 _mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
11887 {
11888 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11889 (__v4di) __Y, 1,
11890 (__mmask8) - 1);
11891 }
11892
11893 extern __inline __mmask8
11894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11895 _mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11896 {
11897 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11898 (__v4di) __Y, 5,
11899 (__mmask8) __M);
11900 }
11901
11902 extern __inline __mmask8
11903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11904 _mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
11905 {
11906 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11907 (__v4di) __Y, 5,
11908 (__mmask8) - 1);
11909 }
11910
11911 extern __inline __mmask8
11912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11913 _mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11914 {
11915 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11916 (__v4di) __Y, 2,
11917 (__mmask8) __M);
11918 }
11919
11920 extern __inline __mmask8
11921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11922 _mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
11923 {
11924 return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
11925 (__v4di) __Y, 2,
11926 (__mmask8) - 1);
11927 }
11928
11929 extern __inline __mmask8
11930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11931 _mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11932 {
11933 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11934 (__v8si) __Y, 4,
11935 (__mmask8) __M);
11936 }
11937
11938 extern __inline __mmask8
11939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11940 _mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
11941 {
11942 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11943 (__v8si) __Y, 4,
11944 (__mmask8) - 1);
11945 }
11946
11947 extern __inline __mmask8
11948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11949 _mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11950 {
11951 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11952 (__v8si) __Y, 1,
11953 (__mmask8) __M);
11954 }
11955
11956 extern __inline __mmask8
11957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11958 _mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
11959 {
11960 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11961 (__v8si) __Y, 1,
11962 (__mmask8) - 1);
11963 }
11964
11965 extern __inline __mmask8
11966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11967 _mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11968 {
11969 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11970 (__v8si) __Y, 5,
11971 (__mmask8) __M);
11972 }
11973
11974 extern __inline __mmask8
11975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11976 _mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
11977 {
11978 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11979 (__v8si) __Y, 5,
11980 (__mmask8) - 1);
11981 }
11982
11983 extern __inline __mmask8
11984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11985 _mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
11986 {
11987 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11988 (__v8si) __Y, 2,
11989 (__mmask8) __M);
11990 }
11991
11992 extern __inline __mmask8
11993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11994 _mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
11995 {
11996 return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
11997 (__v8si) __Y, 2,
11998 (__mmask8) - 1);
11999 }
12000
12001 extern __inline __mmask8
12002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12003 _mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12004 {
12005 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12006 (__v4di) __Y, 4,
12007 (__mmask8) __M);
12008 }
12009
12010 extern __inline __mmask8
12011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12012 _mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
12013 {
12014 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12015 (__v4di) __Y, 4,
12016 (__mmask8) - 1);
12017 }
12018
12019 extern __inline __mmask8
12020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12021 _mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12022 {
12023 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12024 (__v4di) __Y, 1,
12025 (__mmask8) __M);
12026 }
12027
12028 extern __inline __mmask8
12029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12030 _mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
12031 {
12032 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12033 (__v4di) __Y, 1,
12034 (__mmask8) - 1);
12035 }
12036
12037 extern __inline __mmask8
12038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12039 _mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12040 {
12041 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12042 (__v4di) __Y, 5,
12043 (__mmask8) __M);
12044 }
12045
12046 extern __inline __mmask8
12047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12048 _mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
12049 {
12050 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12051 (__v4di) __Y, 5,
12052 (__mmask8) - 1);
12053 }
12054
12055 extern __inline __mmask8
12056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12057 _mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
12058 {
12059 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12060 (__v4di) __Y, 2,
12061 (__mmask8) __M);
12062 }
12063
12064 extern __inline __mmask8
12065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12066 _mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
12067 {
12068 return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12069 (__v4di) __Y, 2,
12070 (__mmask8) - 1);
12071 }
12072
12073 extern __inline __mmask8
12074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12075 _mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12076 {
12077 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12078 (__v4si) __Y, 4,
12079 (__mmask8) __M);
12080 }
12081
12082 extern __inline __mmask8
12083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12084 _mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
12085 {
12086 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12087 (__v4si) __Y, 4,
12088 (__mmask8) - 1);
12089 }
12090
12091 extern __inline __mmask8
12092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12093 _mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12094 {
12095 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12096 (__v4si) __Y, 1,
12097 (__mmask8) __M);
12098 }
12099
12100 extern __inline __mmask8
12101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12102 _mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
12103 {
12104 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12105 (__v4si) __Y, 1,
12106 (__mmask8) - 1);
12107 }
12108
12109 extern __inline __mmask8
12110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12111 _mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12112 {
12113 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12114 (__v4si) __Y, 5,
12115 (__mmask8) __M);
12116 }
12117
12118 extern __inline __mmask8
12119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12120 _mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
12121 {
12122 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12123 (__v4si) __Y, 5,
12124 (__mmask8) - 1);
12125 }
12126
12127 extern __inline __mmask8
12128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12129 _mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12130 {
12131 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12132 (__v4si) __Y, 2,
12133 (__mmask8) __M);
12134 }
12135
12136 extern __inline __mmask8
12137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12138 _mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
12139 {
12140 return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12141 (__v4si) __Y, 2,
12142 (__mmask8) - 1);
12143 }
12144
12145 extern __inline __mmask8
12146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12147 _mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12148 {
12149 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12150 (__v2di) __Y, 4,
12151 (__mmask8) __M);
12152 }
12153
12154 extern __inline __mmask8
12155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12156 _mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
12157 {
12158 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12159 (__v2di) __Y, 4,
12160 (__mmask8) - 1);
12161 }
12162
12163 extern __inline __mmask8
12164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12165 _mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12166 {
12167 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12168 (__v2di) __Y, 1,
12169 (__mmask8) __M);
12170 }
12171
12172 extern __inline __mmask8
12173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12174 _mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
12175 {
12176 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12177 (__v2di) __Y, 1,
12178 (__mmask8) - 1);
12179 }
12180
12181 extern __inline __mmask8
12182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12183 _mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12184 {
12185 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12186 (__v2di) __Y, 5,
12187 (__mmask8) __M);
12188 }
12189
12190 extern __inline __mmask8
12191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12192 _mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
12193 {
12194 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12195 (__v2di) __Y, 5,
12196 (__mmask8) - 1);
12197 }
12198
12199 extern __inline __mmask8
12200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12201 _mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12202 {
12203 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12204 (__v2di) __Y, 2,
12205 (__mmask8) __M);
12206 }
12207
12208 extern __inline __mmask8
12209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12210 _mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
12211 {
12212 return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12213 (__v2di) __Y, 2,
12214 (__mmask8) - 1);
12215 }
12216
12217 extern __inline __mmask8
12218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12219 _mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12220 {
12221 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12222 (__v4si) __Y, 4,
12223 (__mmask8) __M);
12224 }
12225
12226 extern __inline __mmask8
12227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12228 _mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
12229 {
12230 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12231 (__v4si) __Y, 4,
12232 (__mmask8) - 1);
12233 }
12234
12235 extern __inline __mmask8
12236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12237 _mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12238 {
12239 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12240 (__v4si) __Y, 1,
12241 (__mmask8) __M);
12242 }
12243
12244 extern __inline __mmask8
12245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12246 _mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
12247 {
12248 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12249 (__v4si) __Y, 1,
12250 (__mmask8) - 1);
12251 }
12252
12253 extern __inline __mmask8
12254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12255 _mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12256 {
12257 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12258 (__v4si) __Y, 5,
12259 (__mmask8) __M);
12260 }
12261
12262 extern __inline __mmask8
12263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12264 _mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
12265 {
12266 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12267 (__v4si) __Y, 5,
12268 (__mmask8) - 1);
12269 }
12270
12271 extern __inline __mmask8
12272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12273 _mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12274 {
12275 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12276 (__v4si) __Y, 2,
12277 (__mmask8) __M);
12278 }
12279
12280 extern __inline __mmask8
12281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12282 _mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
12283 {
12284 return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12285 (__v4si) __Y, 2,
12286 (__mmask8) - 1);
12287 }
12288
12289 extern __inline __mmask8
12290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12291 _mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12292 {
12293 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12294 (__v2di) __Y, 4,
12295 (__mmask8) __M);
12296 }
12297
12298 extern __inline __mmask8
12299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12300 _mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
12301 {
12302 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12303 (__v2di) __Y, 4,
12304 (__mmask8) - 1);
12305 }
12306
12307 extern __inline __mmask8
12308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12309 _mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12310 {
12311 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12312 (__v2di) __Y, 1,
12313 (__mmask8) __M);
12314 }
12315
12316 extern __inline __mmask8
12317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12318 _mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
12319 {
12320 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12321 (__v2di) __Y, 1,
12322 (__mmask8) - 1);
12323 }
12324
12325 extern __inline __mmask8
12326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12327 _mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12328 {
12329 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12330 (__v2di) __Y, 5,
12331 (__mmask8) __M);
12332 }
12333
12334 extern __inline __mmask8
12335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12336 _mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
12337 {
12338 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12339 (__v2di) __Y, 5,
12340 (__mmask8) - 1);
12341 }
12342
12343 extern __inline __mmask8
12344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12345 _mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
12346 {
12347 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12348 (__v2di) __Y, 2,
12349 (__mmask8) __M);
12350 }
12351
12352 extern __inline __mmask8
12353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12354 _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
12355 {
12356 return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12357 (__v2di) __Y, 2,
12358 (__mmask8) - 1);
12359 }
12360
12361 #else
12362 #define _mm256_permutex_pd(X, M) \
12363 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M), \
12364 (__v4df)(__m256d)_mm256_undefined_pd(),\
12365 (__mmask8)-1))
12366
12367 #define _mm256_maskz_permutex_epi64(M, X, I) \
12368 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12369 (int)(I), \
12370 (__v4di)(__m256i) \
12371 (_mm256_setzero_si256()),\
12372 (__mmask8)(M)))
12373
12374 #define _mm256_mask_permutex_epi64(W, M, X, I) \
12375 ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12376 (int)(I), \
12377 (__v4di)(__m256i)(W), \
12378 (__mmask8)(M)))
12379
12380 #define _mm256_insertf32x4(X, Y, C) \
12381 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12382 (__v4sf)(__m128) (Y), (int) (C), \
12383 (__v8sf)(__m256)_mm256_setzero_ps(), \
12384 (__mmask8)-1))
12385
12386 #define _mm256_mask_insertf32x4(W, U, X, Y, C) \
12387 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12388 (__v4sf)(__m128) (Y), (int) (C), \
12389 (__v8sf)(__m256)(W), \
12390 (__mmask8)(U)))
12391
12392 #define _mm256_maskz_insertf32x4(U, X, Y, C) \
12393 ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
12394 (__v4sf)(__m128) (Y), (int) (C), \
12395 (__v8sf)(__m256)_mm256_setzero_ps(), \
12396 (__mmask8)(U)))
12397
12398 #define _mm256_inserti32x4(X, Y, C) \
12399 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12400 (__v4si)(__m128i) (Y), (int) (C), \
12401 (__v8si)(__m256i)_mm256_setzero_si256(), \
12402 (__mmask8)-1))
12403
12404 #define _mm256_mask_inserti32x4(W, U, X, Y, C) \
12405 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12406 (__v4si)(__m128i) (Y), (int) (C), \
12407 (__v8si)(__m256i)(W), \
12408 (__mmask8)(U)))
12409
12410 #define _mm256_maskz_inserti32x4(U, X, Y, C) \
12411 ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12412 (__v4si)(__m128i) (Y), (int) (C), \
12413 (__v8si)(__m256i)_mm256_setzero_si256(), \
12414 (__mmask8)(U)))
12415
12416 #define _mm256_extractf32x4_ps(X, C) \
12417 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12418 (int) (C), \
12419 (__v4sf)(__m128)_mm_setzero_ps(), \
12420 (__mmask8)-1))
12421
12422 #define _mm256_mask_extractf32x4_ps(W, U, X, C) \
12423 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12424 (int) (C), \
12425 (__v4sf)(__m128)(W), \
12426 (__mmask8)(U)))
12427
12428 #define _mm256_maskz_extractf32x4_ps(U, X, C) \
12429 ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12430 (int) (C), \
12431 (__v4sf)(__m128)_mm_setzero_ps(), \
12432 (__mmask8)(U)))
12433
12434 #define _mm256_extracti32x4_epi32(X, C) \
12435 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12436 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1))
12437
12438 #define _mm256_mask_extracti32x4_epi32(W, U, X, C) \
12439 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12440 (int) (C), (__v4si)(__m128i)(W), (__mmask8)(U)))
12441
12442 #define _mm256_maskz_extracti32x4_epi32(U, X, C) \
12443 ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12444 (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
12445
12446 #define _mm256_shuffle_i64x2(X, Y, C) \
12447 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12448 (__v4di)(__m256i)(Y), (int)(C), \
12449 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12450 (__mmask8)-1))
12451
12452 #define _mm256_mask_shuffle_i64x2(W, U, X, Y, C) \
12453 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12454 (__v4di)(__m256i)(Y), (int)(C), \
12455 (__v4di)(__m256i)(W),\
12456 (__mmask8)(U)))
12457
12458 #define _mm256_maskz_shuffle_i64x2(U, X, Y, C) \
12459 ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
12460 (__v4di)(__m256i)(Y), (int)(C), \
12461 (__v4di)(__m256i)_mm256_setzero_si256 (), \
12462 (__mmask8)(U)))
12463
12464 #define _mm256_shuffle_i32x4(X, Y, C) \
12465 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12466 (__v8si)(__m256i)(Y), (int)(C), \
12467 (__v8si)(__m256i)_mm256_setzero_si256(), \
12468 (__mmask8)-1))
12469
12470 #define _mm256_mask_shuffle_i32x4(W, U, X, Y, C) \
12471 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12472 (__v8si)(__m256i)(Y), (int)(C), \
12473 (__v8si)(__m256i)(W), \
12474 (__mmask8)(U)))
12475
12476 #define _mm256_maskz_shuffle_i32x4(U, X, Y, C) \
12477 ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
12478 (__v8si)(__m256i)(Y), (int)(C), \
12479 (__v8si)(__m256i)_mm256_setzero_si256(), \
12480 (__mmask8)(U)))
12481
12482 #define _mm256_shuffle_f64x2(X, Y, C) \
12483 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12484 (__v4df)(__m256d)(Y), (int)(C), \
12485 (__v4df)(__m256d)_mm256_setzero_pd(), \
12486 (__mmask8)-1))
12487
12488 #define _mm256_mask_shuffle_f64x2(W, U, X, Y, C) \
12489 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12490 (__v4df)(__m256d)(Y), (int)(C), \
12491 (__v4df)(__m256d)(W), \
12492 (__mmask8)(U)))
12493
12494 #define _mm256_maskz_shuffle_f64x2(U, X, Y, C) \
12495 ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
12496 (__v4df)(__m256d)(Y), (int)(C), \
12497 (__v4df)(__m256d)_mm256_setzero_pd(), \
12498 (__mmask8)(U)))
12499
12500 #define _mm256_shuffle_f32x4(X, Y, C) \
12501 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12502 (__v8sf)(__m256)(Y), (int)(C), \
12503 (__v8sf)(__m256)_mm256_setzero_ps(), \
12504 (__mmask8)-1))
12505
12506 #define _mm256_mask_shuffle_f32x4(W, U, X, Y, C) \
12507 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12508 (__v8sf)(__m256)(Y), (int)(C), \
12509 (__v8sf)(__m256)(W), \
12510 (__mmask8)(U)))
12511
12512 #define _mm256_maskz_shuffle_f32x4(U, X, Y, C) \
12513 ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
12514 (__v8sf)(__m256)(Y), (int)(C), \
12515 (__v8sf)(__m256)_mm256_setzero_ps(), \
12516 (__mmask8)(U)))
12517
12518 #define _mm256_mask_shuffle_pd(W, U, A, B, C) \
12519 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
12520 (__v4df)(__m256d)(B), (int)(C), \
12521 (__v4df)(__m256d)(W), \
12522 (__mmask8)(U)))
12523
12524 #define _mm256_maskz_shuffle_pd(U, A, B, C) \
12525 ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
12526 (__v4df)(__m256d)(B), (int)(C), \
12527 (__v4df)(__m256d)_mm256_setzero_pd(),\
12528 (__mmask8)(U)))
12529
12530 #define _mm_mask_shuffle_pd(W, U, A, B, C) \
12531 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
12532 (__v2df)(__m128d)(B), (int)(C), \
12533 (__v2df)(__m128d)(W), \
12534 (__mmask8)(U)))
12535
12536 #define _mm_maskz_shuffle_pd(U, A, B, C) \
12537 ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
12538 (__v2df)(__m128d)(B), (int)(C), \
12539 (__v2df)(__m128d)_mm_setzero_pd(), \
12540 (__mmask8)(U)))
12541
12542 #define _mm256_mask_shuffle_ps(W, U, A, B, C) \
12543 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
12544 (__v8sf)(__m256)(B), (int)(C), \
12545 (__v8sf)(__m256)(W), \
12546 (__mmask8)(U)))
12547
12548 #define _mm256_maskz_shuffle_ps(U, A, B, C) \
12549 ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
12550 (__v8sf)(__m256)(B), (int)(C), \
12551 (__v8sf)(__m256)_mm256_setzero_ps(), \
12552 (__mmask8)(U)))
12553
12554 #define _mm_mask_shuffle_ps(W, U, A, B, C) \
12555 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
12556 (__v4sf)(__m128)(B), (int)(C), \
12557 (__v4sf)(__m128)(W), \
12558 (__mmask8)(U)))
12559
12560 #define _mm_maskz_shuffle_ps(U, A, B, C) \
12561 ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
12562 (__v4sf)(__m128)(B), (int)(C), \
12563 (__v4sf)(__m128)_mm_setzero_ps(), \
12564 (__mmask8)(U)))
12565
12566 #define _mm256_fixupimm_pd(X, Y, Z, C) \
12567 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
12568 (__v4df)(__m256d)(Y), \
12569 (__v4di)(__m256i)(Z), (int)(C), \
12570 (__mmask8)(-1)))
12571
12572 #define _mm256_mask_fixupimm_pd(X, U, Y, Z, C) \
12573 ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
12574 (__v4df)(__m256d)(Y), \
12575 (__v4di)(__m256i)(Z), (int)(C), \
12576 (__mmask8)(U)))
12577
12578 #define _mm256_maskz_fixupimm_pd(U, X, Y, Z, C) \
12579 ((__m256d)__builtin_ia32_fixupimmpd256_maskz ((__v4df)(__m256d)(X), \
12580 (__v4df)(__m256d)(Y), \
12581 (__v4di)(__m256i)(Z), (int)(C),\
12582 (__mmask8)(U)))
12583
12584 #define _mm256_fixupimm_ps(X, Y, Z, C) \
12585 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
12586 (__v8sf)(__m256)(Y), \
12587 (__v8si)(__m256i)(Z), (int)(C), \
12588 (__mmask8)(-1)))
12589
12590
12591 #define _mm256_mask_fixupimm_ps(X, U, Y, Z, C) \
12592 ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
12593 (__v8sf)(__m256)(Y), \
12594 (__v8si)(__m256i)(Z), (int)(C), \
12595 (__mmask8)(U)))
12596
12597 #define _mm256_maskz_fixupimm_ps(U, X, Y, Z, C) \
12598 ((__m256)__builtin_ia32_fixupimmps256_maskz ((__v8sf)(__m256)(X), \
12599 (__v8sf)(__m256)(Y), \
12600 (__v8si)(__m256i)(Z), (int)(C),\
12601 (__mmask8)(U)))
12602
12603 #define _mm_fixupimm_pd(X, Y, Z, C) \
12604 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
12605 (__v2df)(__m128d)(Y), \
12606 (__v2di)(__m128i)(Z), (int)(C), \
12607 (__mmask8)(-1)))
12608
12609
12610 #define _mm_mask_fixupimm_pd(X, U, Y, Z, C) \
12611 ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
12612 (__v2df)(__m128d)(Y), \
12613 (__v2di)(__m128i)(Z), (int)(C), \
12614 (__mmask8)(U)))
12615
12616 #define _mm_maskz_fixupimm_pd(U, X, Y, Z, C) \
12617 ((__m128d)__builtin_ia32_fixupimmpd128_maskz ((__v2df)(__m128d)(X), \
12618 (__v2df)(__m128d)(Y), \
12619 (__v2di)(__m128i)(Z), (int)(C),\
12620 (__mmask8)(U)))
12621
12622 #define _mm_fixupimm_ps(X, Y, Z, C) \
12623 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
12624 (__v4sf)(__m128)(Y), \
12625 (__v4si)(__m128i)(Z), (int)(C), \
12626 (__mmask8)(-1)))
12627
12628 #define _mm_mask_fixupimm_ps(X, U, Y, Z, C) \
12629 ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
12630 (__v4sf)(__m128)(Y), \
12631 (__v4si)(__m128i)(Z), (int)(C),\
12632 (__mmask8)(U)))
12633
12634 #define _mm_maskz_fixupimm_ps(U, X, Y, Z, C) \
12635 ((__m128)__builtin_ia32_fixupimmps128_maskz ((__v4sf)(__m128)(X), \
12636 (__v4sf)(__m128)(Y), \
12637 (__v4si)(__m128i)(Z), (int)(C),\
12638 (__mmask8)(U)))
12639
12640 #define _mm256_mask_srli_epi32(W, U, A, B) \
12641 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
12642 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
12643
12644 #define _mm256_maskz_srli_epi32(U, A, B) \
12645 ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
12646 (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
12647
12648 #define _mm_mask_srli_epi32(W, U, A, B) \
12649 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
12650 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
12651
12652 #define _mm_maskz_srli_epi32(U, A, B) \
12653 ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
12654 (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
12655
12656 #define _mm256_mask_srli_epi64(W, U, A, B) \
12657 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
12658 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
12659
12660 #define _mm256_maskz_srli_epi64(U, A, B) \
12661 ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
12662 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
12663
12664 #define _mm_mask_srli_epi64(W, U, A, B) \
12665 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
12666 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
12667
12668 #define _mm_maskz_srli_epi64(U, A, B) \
12669 ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
12670 (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
12671
12672 #define _mm256_mask_slli_epi32(W, U, X, C) \
12673 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
12674 (__v8si)(__m256i)(W),\
12675 (__mmask8)(U)))
12676
12677 #define _mm256_maskz_slli_epi32(U, X, C) \
12678 ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
12679 (__v8si)(__m256i)_mm256_setzero_si256(),\
12680 (__mmask8)(U)))
12681
12682 #define _mm256_mask_slli_epi64(W, U, X, C) \
12683 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
12684 (__v4di)(__m256i)(W),\
12685 (__mmask8)(U)))
12686
12687 #define _mm256_maskz_slli_epi64(U, X, C) \
12688 ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
12689 (__v4di)(__m256i)_mm256_setzero_si256 (),\
12690 (__mmask8)(U)))
12691
12692 #define _mm_mask_slli_epi32(W, U, X, C) \
12693 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12694 (__v4si)(__m128i)(W),\
12695 (__mmask8)(U)))
12696
12697 #define _mm_maskz_slli_epi32(U, X, C) \
12698 ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12699 (__v4si)(__m128i)_mm_setzero_si128 (),\
12700 (__mmask8)(U)))
12701
12702 #define _mm_mask_slli_epi64(W, U, X, C) \
12703 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12704 (__v2di)(__m128i)(W),\
12705 (__mmask8)(U)))
12706
12707 #define _mm_maskz_slli_epi64(U, X, C) \
12708 ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12709 (__v2di)(__m128i)_mm_setzero_di(),\
12710 (__mmask8)(U)))
12711
12712 #define _mm256_ternarylogic_epi64(A, B, C, I) \
12713 ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
12714 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)-1))
12715
12716 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, I) \
12717 ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
12718 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12719
12720 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I) \
12721 ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di)(__m256i)(A), \
12722 (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12723
12724 #define _mm256_ternarylogic_epi32(A, B, C, I) \
12725 ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
12726 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)-1))
12727
12728 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, I) \
12729 ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
12730 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12731
12732 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I) \
12733 ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si)(__m256i)(A), \
12734 (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12735
12736 #define _mm_ternarylogic_epi64(A, B, C, I) \
12737 ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
12738 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)-1))
12739
12740 #define _mm_mask_ternarylogic_epi64(A, U, B, C, I) \
12741 ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
12742 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12743
12744 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, I) \
12745 ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di)(__m128i)(A), \
12746 (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12747
12748 #define _mm_ternarylogic_epi32(A, B, C, I) \
12749 ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
12750 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)-1))
12751
12752 #define _mm_mask_ternarylogic_epi32(A, U, B, C, I) \
12753 ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
12754 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12755
12756 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, I) \
12757 ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si)(__m128i)(A), \
12758 (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12759
12760 #define _mm256_roundscale_ps(A, B) \
12761 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
12762 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)-1))
12763
12764 #define _mm256_mask_roundscale_ps(W, U, A, B) \
12765 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
12766 (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
12767
12768 #define _mm256_maskz_roundscale_ps(U, A, B) \
12769 ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
12770 (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)(U)))
12771
12772 #define _mm256_roundscale_pd(A, B) \
12773 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
12774 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)-1))
12775
12776 #define _mm256_mask_roundscale_pd(W, U, A, B) \
12777 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
12778 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
12779
12780 #define _mm256_maskz_roundscale_pd(U, A, B) \
12781 ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
12782 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
12783
12784 #define _mm_roundscale_ps(A, B) \
12785 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
12786 (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)-1))
12787
12788 #define _mm_mask_roundscale_ps(W, U, A, B) \
12789 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
12790 (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
12791
12792 #define _mm_maskz_roundscale_ps(U, A, B) \
12793 ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
12794 (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)(U)))
12795
12796 #define _mm_roundscale_pd(A, B) \
12797 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
12798 (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)-1))
12799
12800 #define _mm_mask_roundscale_pd(W, U, A, B) \
12801 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
12802 (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
12803
12804 #define _mm_maskz_roundscale_pd(U, A, B) \
12805 ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
12806 (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)(U)))
12807
12808 #define _mm256_getmant_ps(X, B, C) \
12809 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12810 (int)(((C)<<2) | (B)), \
12811 (__v8sf)(__m256)_mm256_setzero_ps(), \
12812 (__mmask8)-1))
12813
12814 #define _mm256_mask_getmant_ps(W, U, X, B, C) \
12815 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12816 (int)(((C)<<2) | (B)), \
12817 (__v8sf)(__m256)(W), \
12818 (__mmask8)(U)))
12819
12820 #define _mm256_maskz_getmant_ps(U, X, B, C) \
12821 ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
12822 (int)(((C)<<2) | (B)), \
12823 (__v8sf)(__m256)_mm256_setzero_ps(), \
12824 (__mmask8)(U)))
12825
12826 #define _mm_getmant_ps(X, B, C) \
12827 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12828 (int)(((C)<<2) | (B)), \
12829 (__v4sf)(__m128)_mm_setzero_ps(), \
12830 (__mmask8)-1))
12831
12832 #define _mm_mask_getmant_ps(W, U, X, B, C) \
12833 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12834 (int)(((C)<<2) | (B)), \
12835 (__v4sf)(__m128)(W), \
12836 (__mmask8)(U)))
12837
12838 #define _mm_maskz_getmant_ps(U, X, B, C) \
12839 ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
12840 (int)(((C)<<2) | (B)), \
12841 (__v4sf)(__m128)_mm_setzero_ps(), \
12842 (__mmask8)(U)))
12843
12844 #define _mm256_getmant_pd(X, B, C) \
12845 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12846 (int)(((C)<<2) | (B)), \
12847 (__v4df)(__m256d)_mm256_setzero_pd(), \
12848 (__mmask8)-1))
12849
12850 #define _mm256_mask_getmant_pd(W, U, X, B, C) \
12851 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12852 (int)(((C)<<2) | (B)), \
12853 (__v4df)(__m256d)(W), \
12854 (__mmask8)(U)))
12855
12856 #define _mm256_maskz_getmant_pd(U, X, B, C) \
12857 ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
12858 (int)(((C)<<2) | (B)), \
12859 (__v4df)(__m256d)_mm256_setzero_pd(), \
12860 (__mmask8)(U)))
12861
12862 #define _mm_getmant_pd(X, B, C) \
12863 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12864 (int)(((C)<<2) | (B)), \
12865 (__v2df)(__m128d)_mm_setzero_pd(), \
12866 (__mmask8)-1))
12867
12868 #define _mm_mask_getmant_pd(W, U, X, B, C) \
12869 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12870 (int)(((C)<<2) | (B)), \
12871 (__v2df)(__m128d)(W), \
12872 (__mmask8)(U)))
12873
12874 #define _mm_maskz_getmant_pd(U, X, B, C) \
12875 ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
12876 (int)(((C)<<2) | (B)), \
12877 (__v2df)(__m128d)_mm_setzero_pd(), \
12878 (__mmask8)(U)))
12879
12880 #define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12881 (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256)V1OLD, \
12882 (float const *)ADDR, \
12883 (__v8si)(__m256i)INDEX, \
12884 (__mmask8)MASK, (int)SCALE)
12885
12886 #define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12887 (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128)V1OLD, \
12888 (float const *)ADDR, \
12889 (__v4si)(__m128i)INDEX, \
12890 (__mmask8)MASK, (int)SCALE)
12891
12892 #define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12893 (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d)V1OLD, \
12894 (double const *)ADDR, \
12895 (__v4si)(__m128i)INDEX, \
12896 (__mmask8)MASK, (int)SCALE)
12897
12898 #define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12899 (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d)V1OLD, \
12900 (double const *)ADDR, \
12901 (__v4si)(__m128i)INDEX, \
12902 (__mmask8)MASK, (int)SCALE)
12903
12904 #define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12905 (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128)V1OLD, \
12906 (float const *)ADDR, \
12907 (__v4di)(__m256i)INDEX, \
12908 (__mmask8)MASK, (int)SCALE)
12909
12910 #define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12911 (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128)V1OLD, \
12912 (float const *)ADDR, \
12913 (__v2di)(__m128i)INDEX, \
12914 (__mmask8)MASK, (int)SCALE)
12915
12916 #define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12917 (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d)V1OLD, \
12918 (double const *)ADDR, \
12919 (__v4di)(__m256i)INDEX, \
12920 (__mmask8)MASK, (int)SCALE)
12921
12922 #define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12923 (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d)V1OLD, \
12924 (double const *)ADDR, \
12925 (__v2di)(__m128i)INDEX, \
12926 (__mmask8)MASK, (int)SCALE)
12927
12928 #define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12929 (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i)V1OLD, \
12930 (int const *)ADDR, \
12931 (__v8si)(__m256i)INDEX, \
12932 (__mmask8)MASK, (int)SCALE)
12933
12934 #define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12935 (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i)V1OLD, \
12936 (int const *)ADDR, \
12937 (__v4si)(__m128i)INDEX, \
12938 (__mmask8)MASK, (int)SCALE)
12939
12940 #define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12941 (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i)V1OLD, \
12942 (long long const *)ADDR, \
12943 (__v4si)(__m128i)INDEX, \
12944 (__mmask8)MASK, (int)SCALE)
12945
12946 #define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12947 (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i)V1OLD, \
12948 (long long const *)ADDR, \
12949 (__v4si)(__m128i)INDEX, \
12950 (__mmask8)MASK, (int)SCALE)
12951
12952 #define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12953 (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i)V1OLD, \
12954 (int const *)ADDR, \
12955 (__v4di)(__m256i)INDEX, \
12956 (__mmask8)MASK, (int)SCALE)
12957
12958 #define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12959 (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i)V1OLD, \
12960 (int const *)ADDR, \
12961 (__v2di)(__m128i)INDEX, \
12962 (__mmask8)MASK, (int)SCALE)
12963
12964 #define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12965 (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i)V1OLD, \
12966 (long long const *)ADDR, \
12967 (__v4di)(__m256i)INDEX, \
12968 (__mmask8)MASK, (int)SCALE)
12969
12970 #define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12971 (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i)V1OLD, \
12972 (long long const *)ADDR, \
12973 (__v2di)(__m128i)INDEX, \
12974 (__mmask8)MASK, (int)SCALE)
12975
12976 #define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
12977 __builtin_ia32_scattersiv8sf ((float *)ADDR, (__mmask8)0xFF, \
12978 (__v8si)(__m256i)INDEX, \
12979 (__v8sf)(__m256)V1, (int)SCALE)
12980
12981 #define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
12982 __builtin_ia32_scattersiv8sf ((float *)ADDR, (__mmask8)MASK, \
12983 (__v8si)(__m256i)INDEX, \
12984 (__v8sf)(__m256)V1, (int)SCALE)
12985
12986 #define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
12987 __builtin_ia32_scattersiv4sf ((float *)ADDR, (__mmask8)0xFF, \
12988 (__v4si)(__m128i)INDEX, \
12989 (__v4sf)(__m128)V1, (int)SCALE)
12990
12991 #define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
12992 __builtin_ia32_scattersiv4sf ((float *)ADDR, (__mmask8)MASK, \
12993 (__v4si)(__m128i)INDEX, \
12994 (__v4sf)(__m128)V1, (int)SCALE)
12995
12996 #define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
12997 __builtin_ia32_scattersiv4df ((double *)ADDR, (__mmask8)0xFF, \
12998 (__v4si)(__m128i)INDEX, \
12999 (__v4df)(__m256d)V1, (int)SCALE)
13000
13001 #define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13002 __builtin_ia32_scattersiv4df ((double *)ADDR, (__mmask8)MASK, \
13003 (__v4si)(__m128i)INDEX, \
13004 (__v4df)(__m256d)V1, (int)SCALE)
13005
13006 #define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
13007 __builtin_ia32_scattersiv2df ((double *)ADDR, (__mmask8)0xFF, \
13008 (__v4si)(__m128i)INDEX, \
13009 (__v2df)(__m128d)V1, (int)SCALE)
13010
13011 #define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13012 __builtin_ia32_scattersiv2df ((double *)ADDR, (__mmask8)MASK, \
13013 (__v4si)(__m128i)INDEX, \
13014 (__v2df)(__m128d)V1, (int)SCALE)
13015
13016 #define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
13017 __builtin_ia32_scatterdiv8sf ((float *)ADDR, (__mmask8)0xFF, \
13018 (__v4di)(__m256i)INDEX, \
13019 (__v4sf)(__m128)V1, (int)SCALE)
13020
13021 #define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
13022 __builtin_ia32_scatterdiv8sf ((float *)ADDR, (__mmask8)MASK, \
13023 (__v4di)(__m256i)INDEX, \
13024 (__v4sf)(__m128)V1, (int)SCALE)
13025
13026 #define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
13027 __builtin_ia32_scatterdiv4sf ((float *)ADDR, (__mmask8)0xFF, \
13028 (__v2di)(__m128i)INDEX, \
13029 (__v4sf)(__m128)V1, (int)SCALE)
13030
13031 #define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
13032 __builtin_ia32_scatterdiv4sf ((float *)ADDR, (__mmask8)MASK, \
13033 (__v2di)(__m128i)INDEX, \
13034 (__v4sf)(__m128)V1, (int)SCALE)
13035
13036 #define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
13037 __builtin_ia32_scatterdiv4df ((double *)ADDR, (__mmask8)0xFF, \
13038 (__v4di)(__m256i)INDEX, \
13039 (__v4df)(__m256d)V1, (int)SCALE)
13040
13041 #define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13042 __builtin_ia32_scatterdiv4df ((double *)ADDR, (__mmask8)MASK, \
13043 (__v4di)(__m256i)INDEX, \
13044 (__v4df)(__m256d)V1, (int)SCALE)
13045
13046 #define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
13047 __builtin_ia32_scatterdiv2df ((double *)ADDR, (__mmask8)0xFF, \
13048 (__v2di)(__m128i)INDEX, \
13049 (__v2df)(__m128d)V1, (int)SCALE)
13050
13051 #define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
13052 __builtin_ia32_scatterdiv2df ((double *)ADDR, (__mmask8)MASK, \
13053 (__v2di)(__m128i)INDEX, \
13054 (__v2df)(__m128d)V1, (int)SCALE)
13055
13056 #define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
13057 __builtin_ia32_scattersiv8si ((int *)ADDR, (__mmask8)0xFF, \
13058 (__v8si)(__m256i)INDEX, \
13059 (__v8si)(__m256i)V1, (int)SCALE)
13060
13061 #define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13062 __builtin_ia32_scattersiv8si ((int *)ADDR, (__mmask8)MASK, \
13063 (__v8si)(__m256i)INDEX, \
13064 (__v8si)(__m256i)V1, (int)SCALE)
13065
13066 #define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
13067 __builtin_ia32_scattersiv4si ((int *)ADDR, (__mmask8)0xFF, \
13068 (__v4si)(__m128i)INDEX, \
13069 (__v4si)(__m128i)V1, (int)SCALE)
13070
13071 #define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13072 __builtin_ia32_scattersiv4si ((int *)ADDR, (__mmask8)MASK, \
13073 (__v4si)(__m128i)INDEX, \
13074 (__v4si)(__m128i)V1, (int)SCALE)
13075
13076 #define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
13077 __builtin_ia32_scattersiv4di ((long long *)ADDR, (__mmask8)0xFF, \
13078 (__v4si)(__m128i)INDEX, \
13079 (__v4di)(__m256i)V1, (int)SCALE)
13080
13081 #define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13082 __builtin_ia32_scattersiv4di ((long long *)ADDR, (__mmask8)MASK, \
13083 (__v4si)(__m128i)INDEX, \
13084 (__v4di)(__m256i)V1, (int)SCALE)
13085
13086 #define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
13087 __builtin_ia32_scattersiv2di ((long long *)ADDR, (__mmask8)0xFF, \
13088 (__v4si)(__m128i)INDEX, \
13089 (__v2di)(__m128i)V1, (int)SCALE)
13090
13091 #define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13092 __builtin_ia32_scattersiv2di ((long long *)ADDR, (__mmask8)MASK, \
13093 (__v4si)(__m128i)INDEX, \
13094 (__v2di)(__m128i)V1, (int)SCALE)
13095
13096 #define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
13097 __builtin_ia32_scatterdiv8si ((int *)ADDR, (__mmask8)0xFF, \
13098 (__v4di)(__m256i)INDEX, \
13099 (__v4si)(__m128i)V1, (int)SCALE)
13100
13101 #define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13102 __builtin_ia32_scatterdiv8si ((int *)ADDR, (__mmask8)MASK, \
13103 (__v4di)(__m256i)INDEX, \
13104 (__v4si)(__m128i)V1, (int)SCALE)
13105
13106 #define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
13107 __builtin_ia32_scatterdiv4si ((int *)ADDR, (__mmask8)0xFF, \
13108 (__v2di)(__m128i)INDEX, \
13109 (__v4si)(__m128i)V1, (int)SCALE)
13110
13111 #define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
13112 __builtin_ia32_scatterdiv4si ((int *)ADDR, (__mmask8)MASK, \
13113 (__v2di)(__m128i)INDEX, \
13114 (__v4si)(__m128i)V1, (int)SCALE)
13115
13116 #define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
13117 __builtin_ia32_scatterdiv4di ((long long *)ADDR, (__mmask8)0xFF, \
13118 (__v4di)(__m256i)INDEX, \
13119 (__v4di)(__m256i)V1, (int)SCALE)
13120
13121 #define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13122 __builtin_ia32_scatterdiv4di ((long long *)ADDR, (__mmask8)MASK, \
13123 (__v4di)(__m256i)INDEX, \
13124 (__v4di)(__m256i)V1, (int)SCALE)
13125
13126 #define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
13127 __builtin_ia32_scatterdiv2di ((long long *)ADDR, (__mmask8)0xFF, \
13128 (__v2di)(__m128i)INDEX, \
13129 (__v2di)(__m128i)V1, (int)SCALE)
13130
13131 #define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
13132 __builtin_ia32_scatterdiv2di ((long long *)ADDR, (__mmask8)MASK, \
13133 (__v2di)(__m128i)INDEX, \
13134 (__v2di)(__m128i)V1, (int)SCALE)
13135
13136 #define _mm256_mask_shuffle_epi32(W, U, X, C) \
13137 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
13138 (__v8si)(__m256i)(W), \
13139 (__mmask8)(U)))
13140
13141 #define _mm256_maskz_shuffle_epi32(U, X, C) \
13142 ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
13143 (__v8si)(__m256i)_mm256_setzero_si256(), \
13144 (__mmask8)(U)))
13145
13146 #define _mm_mask_shuffle_epi32(W, U, X, C) \
13147 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
13148 (__v4si)(__m128i)(W), \
13149 (__mmask8)(U)))
13150
13151 #define _mm_maskz_shuffle_epi32(U, X, C) \
13152 ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
13153 (__v4si)(__m128i)_mm_setzero_si128 (), \
13154 (__mmask8)(U)))
13155
13156 #define _mm256_rol_epi64(A, B) \
13157 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13158 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13159 (__mmask8)-1))
13160
13161 #define _mm256_mask_rol_epi64(W, U, A, B) \
13162 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13163 (__v4di)(__m256i)(W), \
13164 (__mmask8)(U)))
13165
13166 #define _mm256_maskz_rol_epi64(U, A, B) \
13167 ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13168 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13169 (__mmask8)(U)))
13170
13171 #define _mm_rol_epi64(A, B) \
13172 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13173 (__v2di)(__m128i)_mm_setzero_di(), \
13174 (__mmask8)-1))
13175
13176 #define _mm_mask_rol_epi64(W, U, A, B) \
13177 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13178 (__v2di)(__m128i)(W), \
13179 (__mmask8)(U)))
13180
13181 #define _mm_maskz_rol_epi64(U, A, B) \
13182 ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13183 (__v2di)(__m128i)_mm_setzero_di(), \
13184 (__mmask8)(U)))
13185
13186 #define _mm256_ror_epi64(A, B) \
13187 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13188 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13189 (__mmask8)-1))
13190
13191 #define _mm256_mask_ror_epi64(W, U, A, B) \
13192 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13193 (__v4di)(__m256i)(W), \
13194 (__mmask8)(U)))
13195
13196 #define _mm256_maskz_ror_epi64(U, A, B) \
13197 ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
13198 (__v4di)(__m256i)_mm256_setzero_si256 (),\
13199 (__mmask8)(U)))
13200
13201 #define _mm_ror_epi64(A, B) \
13202 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13203 (__v2di)(__m128i)_mm_setzero_di(), \
13204 (__mmask8)-1))
13205
13206 #define _mm_mask_ror_epi64(W, U, A, B) \
13207 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13208 (__v2di)(__m128i)(W), \
13209 (__mmask8)(U)))
13210
13211 #define _mm_maskz_ror_epi64(U, A, B) \
13212 ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
13213 (__v2di)(__m128i)_mm_setzero_di(), \
13214 (__mmask8)(U)))
13215
13216 #define _mm256_rol_epi32(A, B) \
13217 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
13218 (__v8si)(__m256i)_mm256_setzero_si256(),\
13219 (__mmask8)-1))
13220
13221 #define _mm256_mask_rol_epi32(W, U, A, B) \
13222 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
13223 (__v8si)(__m256i)(W), \
13224 (__mmask8)(U)))
13225
13226 #define _mm256_maskz_rol_epi32(U, A, B) \
13227 ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
13228 (__v8si)(__m256i)_mm256_setzero_si256(),\
13229 (__mmask8)(U)))
13230
13231 #define _mm_rol_epi32(A, B) \
13232 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
13233 (__v4si)(__m128i)_mm_setzero_si128 (), \
13234 (__mmask8)-1))
13235
13236 #define _mm_mask_rol_epi32(W, U, A, B) \
13237 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
13238 (__v4si)(__m128i)(W), \
13239 (__mmask8)(U)))
13240
13241 #define _mm_maskz_rol_epi32(U, A, B) \
13242 ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
13243 (__v4si)(__m128i)_mm_setzero_si128 (), \
13244 (__mmask8)(U)))
13245
13246 #define _mm256_ror_epi32(A, B) \
13247 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
13248 (__v8si)(__m256i)_mm256_setzero_si256(),\
13249 (__mmask8)-1))
13250
13251 #define _mm256_mask_ror_epi32(W, U, A, B) \
13252 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
13253 (__v8si)(__m256i)(W), \
13254 (__mmask8)(U)))
13255
13256 #define _mm256_maskz_ror_epi32(U, A, B) \
13257 ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
13258 (__v8si)(__m256i)_mm256_setzero_si256(),\
13259 (__mmask8)(U)))
13260
13261 #define _mm_ror_epi32(A, B) \
13262 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
13263 (__v4si)(__m128i)_mm_setzero_si128 (), \
13264 (__mmask8)-1))
13265
13266 #define _mm_mask_ror_epi32(W, U, A, B) \
13267 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
13268 (__v4si)(__m128i)(W), \
13269 (__mmask8)(U)))
13270
13271 #define _mm_maskz_ror_epi32(U, A, B) \
13272 ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
13273 (__v4si)(__m128i)_mm_setzero_si128 (), \
13274 (__mmask8)(U)))
13275
13276 #define _mm256_alignr_epi32(X, Y, C) \
13277 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13278 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(X), (__mmask8)-1))
13279
13280 #define _mm256_mask_alignr_epi32(W, U, X, Y, C) \
13281 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13282 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(W), (__mmask8)(U)))
13283
13284 #define _mm256_maskz_alignr_epi32(U, X, Y, C) \
13285 ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
13286 (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\
13287 (__mmask8)(U)))
13288
13289 #define _mm256_alignr_epi64(X, Y, C) \
13290 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13291 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(X), (__mmask8)-1))
13292
13293 #define _mm256_mask_alignr_epi64(W, U, X, Y, C) \
13294 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13295 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(W), (__mmask8)(U)))
13296
13297 #define _mm256_maskz_alignr_epi64(U, X, Y, C) \
13298 ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
13299 (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\
13300 (__mmask8)(U)))
13301
13302 #define _mm_alignr_epi32(X, Y, C) \
13303 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13304 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(X), (__mmask8)-1))
13305
13306 #define _mm_mask_alignr_epi32(W, U, X, Y, C) \
13307 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13308 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(W), (__mmask8)(U)))
13309
13310 #define _mm_maskz_alignr_epi32(U, X, Y, C) \
13311 ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
13312 (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128(),\
13313 (__mmask8)(U)))
13314
13315 #define _mm_alignr_epi64(X, Y, C) \
13316 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13317 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13318
13319 #define _mm_mask_alignr_epi64(W, U, X, Y, C) \
13320 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13321 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13322
13323 #define _mm_maskz_alignr_epi64(U, X, Y, C) \
13324 ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
13325 (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128(),\
13326 (__mmask8)(U)))
13327
13328 #define _mm_mask_cvtps_ph(W, U, A, I) \
13329 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
13330 (__v8hi)(__m128i) (W), (__mmask8) (U)))
13331
13332 #define _mm_maskz_cvtps_ph(U, A, I) \
13333 ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
13334 (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
13335
13336 #define _mm256_mask_cvtps_ph(W, U, A, I) \
13337 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
13338 (__v8hi)(__m128i) (W), (__mmask8) (U)))
13339
13340 #define _mm256_maskz_cvtps_ph(U, A, I) \
13341 ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
13342 (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
13343
13344 #define _mm256_mask_srai_epi32(W, U, A, B) \
13345 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
13346 (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
13347
13348 #define _mm256_maskz_srai_epi32(U, A, B) \
13349 ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
13350 (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
13351
13352 #define _mm_mask_srai_epi32(W, U, A, B) \
13353 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
13354 (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
13355
13356 #define _mm_maskz_srai_epi32(U, A, B) \
13357 ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
13358 (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
13359
13360 #define _mm256_srai_epi64(A, B) \
13361 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13362 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
13363
13364 #define _mm256_mask_srai_epi64(W, U, A, B) \
13365 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13366 (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
13367
13368 #define _mm256_maskz_srai_epi64(U, A, B) \
13369 ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
13370 (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
13371
13372 #define _mm_srai_epi64(A, B) \
13373 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
13374 (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)-1))
13375
13376 #define _mm_mask_srai_epi64(W, U, A, B) \
13377 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
13378 (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
13379
13380 #define _mm_maskz_srai_epi64(U, A, B) \
13381 ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
13382 (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
13383
13384 #define _mm256_mask_permutex_pd(W, U, A, B) \
13385 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
13386 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
13387
13388 #define _mm256_maskz_permutex_pd(U, A, B) \
13389 ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
13390 (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
13391
13392 #define _mm256_mask_permute_pd(W, U, X, C) \
13393 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
13394 (__v4df)(__m256d)(W), \
13395 (__mmask8)(U)))
13396
13397 #define _mm256_maskz_permute_pd(U, X, C) \
13398 ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
13399 (__v4df)(__m256d)_mm256_setzero_pd(), \
13400 (__mmask8)(U)))
13401
13402 #define _mm256_mask_permute_ps(W, U, X, C) \
13403 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
13404 (__v8sf)(__m256)(W), (__mmask8)(U)))
13405
13406 #define _mm256_maskz_permute_ps(U, X, C) \
13407 ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
13408 (__v8sf)(__m256)_mm256_setzero_ps(), \
13409 (__mmask8)(U)))
13410
13411 #define _mm_mask_permute_pd(W, U, X, C) \
13412 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
13413 (__v2df)(__m128d)(W), (__mmask8)(U)))
13414
13415 #define _mm_maskz_permute_pd(U, X, C) \
13416 ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
13417 (__v2df)(__m128d)_mm_setzero_pd(), \
13418 (__mmask8)(U)))
13419
13420 #define _mm_mask_permute_ps(W, U, X, C) \
13421 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
13422 (__v4sf)(__m128)(W), (__mmask8)(U)))
13423
13424 #define _mm_maskz_permute_ps(U, X, C) \
13425 ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
13426 (__v4sf)(__m128)_mm_setzero_ps(), \
13427 (__mmask8)(U)))
13428
13429 #define _mm256_mask_blend_pd(__U, __A, __W) \
13430 ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A), \
13431 (__v4df) (__W), \
13432 (__mmask8) (__U)))
13433
13434 #define _mm256_mask_blend_ps(__U, __A, __W) \
13435 ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A), \
13436 (__v8sf) (__W), \
13437 (__mmask8) (__U)))
13438
13439 #define _mm256_mask_blend_epi64(__U, __A, __W) \
13440 ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A), \
13441 (__v4di) (__W), \
13442 (__mmask8) (__U)))
13443
13444 #define _mm256_mask_blend_epi32(__U, __A, __W) \
13445 ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A), \
13446 (__v8si) (__W), \
13447 (__mmask8) (__U)))
13448
13449 #define _mm_mask_blend_pd(__U, __A, __W) \
13450 ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A), \
13451 (__v2df) (__W), \
13452 (__mmask8) (__U)))
13453
13454 #define _mm_mask_blend_ps(__U, __A, __W) \
13455 ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A), \
13456 (__v4sf) (__W), \
13457 (__mmask8) (__U)))
13458
13459 #define _mm_mask_blend_epi64(__U, __A, __W) \
13460 ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A), \
13461 (__v2di) (__W), \
13462 (__mmask8) (__U)))
13463
13464 #define _mm_mask_blend_epi32(__U, __A, __W) \
13465 ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A), \
13466 (__v4si) (__W), \
13467 (__mmask8) (__U)))
13468
13469 #define _mm256_cmp_epu32_mask(X, Y, P) \
13470 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
13471 (__v8si)(__m256i)(Y), (int)(P),\
13472 (__mmask8)-1))
13473
13474 #define _mm256_cmp_epi64_mask(X, Y, P) \
13475 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
13476 (__v4di)(__m256i)(Y), (int)(P),\
13477 (__mmask8)-1))
13478
13479 #define _mm256_cmp_epi32_mask(X, Y, P) \
13480 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
13481 (__v8si)(__m256i)(Y), (int)(P),\
13482 (__mmask8)-1))
13483
13484 #define _mm256_cmp_epu64_mask(X, Y, P) \
13485 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
13486 (__v4di)(__m256i)(Y), (int)(P),\
13487 (__mmask8)-1))
13488
13489 #define _mm256_cmp_pd_mask(X, Y, P) \
13490 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
13491 (__v4df)(__m256d)(Y), (int)(P),\
13492 (__mmask8)-1))
13493
13494 #define _mm256_cmp_ps_mask(X, Y, P) \
13495 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
13496 (__v8sf)(__m256)(Y), (int)(P),\
13497 (__mmask8)-1))
13498
13499 #define _mm256_mask_cmp_epi64_mask(M, X, Y, P) \
13500 ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
13501 (__v4di)(__m256i)(Y), (int)(P),\
13502 (__mmask8)(M)))
13503
13504 #define _mm256_mask_cmp_epi32_mask(M, X, Y, P) \
13505 ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
13506 (__v8si)(__m256i)(Y), (int)(P),\
13507 (__mmask8)(M)))
13508
13509 #define _mm256_mask_cmp_epu64_mask(M, X, Y, P) \
13510 ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
13511 (__v4di)(__m256i)(Y), (int)(P),\
13512 (__mmask8)(M)))
13513
13514 #define _mm256_mask_cmp_epu32_mask(M, X, Y, P) \
13515 ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
13516 (__v8si)(__m256i)(Y), (int)(P),\
13517 (__mmask8)(M)))
13518
13519 #define _mm256_mask_cmp_pd_mask(M, X, Y, P) \
13520 ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
13521 (__v4df)(__m256d)(Y), (int)(P),\
13522 (__mmask8)(M)))
13523
13524 #define _mm256_mask_cmp_ps_mask(M, X, Y, P) \
13525 ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
13526 (__v8sf)(__m256)(Y), (int)(P),\
13527 (__mmask8)(M)))
13528
13529 #define _mm_cmp_epi64_mask(X, Y, P) \
13530 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
13531 (__v2di)(__m128i)(Y), (int)(P),\
13532 (__mmask8)-1))
13533
13534 #define _mm_cmp_epi32_mask(X, Y, P) \
13535 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
13536 (__v4si)(__m128i)(Y), (int)(P),\
13537 (__mmask8)-1))
13538
13539 #define _mm_cmp_epu64_mask(X, Y, P) \
13540 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
13541 (__v2di)(__m128i)(Y), (int)(P),\
13542 (__mmask8)-1))
13543
13544 #define _mm_cmp_epu32_mask(X, Y, P) \
13545 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
13546 (__v4si)(__m128i)(Y), (int)(P),\
13547 (__mmask8)-1))
13548
13549 #define _mm_cmp_pd_mask(X, Y, P) \
13550 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
13551 (__v2df)(__m128d)(Y), (int)(P),\
13552 (__mmask8)-1))
13553
13554 #define _mm_cmp_ps_mask(X, Y, P) \
13555 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
13556 (__v4sf)(__m128)(Y), (int)(P),\
13557 (__mmask8)-1))
13558
13559 #define _mm_mask_cmp_epi64_mask(M, X, Y, P) \
13560 ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
13561 (__v2di)(__m128i)(Y), (int)(P),\
13562 (__mmask8)(M)))
13563
13564 #define _mm_mask_cmp_epi32_mask(M, X, Y, P) \
13565 ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
13566 (__v4si)(__m128i)(Y), (int)(P),\
13567 (__mmask8)(M)))
13568
13569 #define _mm_mask_cmp_epu64_mask(M, X, Y, P) \
13570 ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
13571 (__v2di)(__m128i)(Y), (int)(P),\
13572 (__mmask8)(M)))
13573
13574 #define _mm_mask_cmp_epu32_mask(M, X, Y, P) \
13575 ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
13576 (__v4si)(__m128i)(Y), (int)(P),\
13577 (__mmask8)(M)))
13578
13579 #define _mm_mask_cmp_pd_mask(M, X, Y, P) \
13580 ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
13581 (__v2df)(__m128d)(Y), (int)(P),\
13582 (__mmask8)(M)))
13583
13584 #define _mm_mask_cmp_ps_mask(M, X, Y, P) \
13585 ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
13586 (__v4sf)(__m128)(Y), (int)(P),\
13587 (__mmask8)(M)))
13588
13589 #endif
13590
13591 #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
13592
13593 #ifdef __DISABLE_AVX512VL__
13594 #undef __DISABLE_AVX512VL__
13595 #pragma GCC pop_options
13596 #endif /* __DISABLE_AVX512VL__ */
13597
13598 #endif /* _AVX512VLINTRIN_H_INCLUDED */