]>
git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/avx512vbmi2vlintrin.h
1 /* Copyright (C) 2013-2017 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512vbmi2vlintrin.h> directly; include <immintrin.h> instead."
28 #ifndef _AVX512VBMI2VLINTRIN_H_INCLUDED
29 #define _AVX512VBMI2VLINTRIN_H_INCLUDED
31 #if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__)
32 #pragma GCC push_options
33 #pragma GCC target("avx512vbmi2,avx512vl")
34 #define __DISABLE_AVX512VBMI2VL__
35 #endif /* __AVX512VBMIVL__ */
37 extern __inline __m128i
38 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
39 _mm_mask_compress_epi8 (__m128i __A
, __mmask16 __B
, __m128i __C
)
41 return (__m128i
) __builtin_ia32_compressqi128_mask ((__v16qi
)__C
,
42 (__v16qi
)__A
, (__mmask16
)__B
);
45 extern __inline __m128i
46 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
47 _mm_maskz_compress_epi8 (__mmask16 __A
, __m128i __B
)
49 return (__m128i
) __builtin_ia32_compressqi128_mask ((__v16qi
) __B
,
50 (__v16qi
) _mm_setzero_si128 (), (__mmask16
) __A
);
55 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
56 _mm256_mask_compressstoreu_epi16 (void * __A
, __mmask16 __B
, __m256i __C
)
58 __builtin_ia32_compressstoreuhi256_mask ((__v16hi
*) __A
, (__v16hi
) __C
,
62 extern __inline __m128i
63 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
64 _mm_mask_compress_epi16 (__m128i __A
, __mmask8 __B
, __m128i __C
)
66 return (__m128i
) __builtin_ia32_compresshi128_mask ((__v8hi
)__C
, (__v8hi
)__A
,
70 extern __inline __m128i
71 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
72 _mm_maskz_compress_epi16 (__mmask8 __A
, __m128i __B
)
74 return (__m128i
) __builtin_ia32_compresshi128_mask ((__v8hi
) __B
,
75 (__v8hi
) _mm_setzero_si128 (), (__mmask8
) __A
);
78 extern __inline __m256i
79 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
80 _mm256_mask_compress_epi16 (__m256i __A
, __mmask16 __B
, __m256i __C
)
82 return (__m256i
) __builtin_ia32_compresshi256_mask ((__v16hi
)__C
,
83 (__v16hi
)__A
, (__mmask16
)__B
);
86 extern __inline __m256i
87 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
88 _mm256_maskz_compress_epi16 (__mmask16 __A
, __m256i __B
)
90 return (__m256i
) __builtin_ia32_compresshi256_mask ((__v16hi
) __B
,
91 (__v16hi
) _mm256_setzero_si256 (), (__mmask16
) __A
);
95 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
96 _mm_mask_compressstoreu_epi8 (void * __A
, __mmask16 __B
, __m128i __C
)
98 __builtin_ia32_compressstoreuqi128_mask ((__v16qi
*) __A
, (__v16qi
) __C
,
103 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
104 _mm_mask_compressstoreu_epi16 (void * __A
, __mmask8 __B
, __m128i __C
)
106 __builtin_ia32_compressstoreuhi128_mask ((__v8hi
*) __A
, (__v8hi
) __C
,
110 extern __inline __m128i
111 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
112 _mm_mask_expand_epi8 (__m128i __A
, __mmask16 __B
, __m128i __C
)
114 return (__m128i
) __builtin_ia32_expandqi128_mask ((__v16qi
) __C
,
119 extern __inline __m128i
120 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
121 _mm_maskz_expand_epi8 (__mmask16 __A
, __m128i __B
)
123 return (__m128i
) __builtin_ia32_expandqi128_maskz ((__v16qi
) __B
,
124 (__v16qi
) _mm_setzero_si128 (), (__mmask16
) __A
);
127 extern __inline __m128i
128 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
129 _mm_mask_expandloadu_epi8 (__m128i __A
, __mmask16 __B
, const void * __C
)
131 return (__m128i
) __builtin_ia32_expandloadqi128_mask ((const __v16qi
*) __C
,
132 (__v16qi
) __A
, (__mmask16
) __B
);
135 extern __inline __m128i
136 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
137 _mm_maskz_expandloadu_epi8 (__mmask16 __A
, const void * __B
)
139 return (__m128i
) __builtin_ia32_expandloadqi128_maskz ((const __v16qi
*) __B
,
140 (__v16qi
) _mm_setzero_si128 (), (__mmask16
) __A
);
143 extern __inline __m128i
144 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
145 _mm_mask_expand_epi16 (__m128i __A
, __mmask8 __B
, __m128i __C
)
147 return (__m128i
) __builtin_ia32_expandhi128_mask ((__v8hi
) __C
,
152 extern __inline __m128i
153 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
154 _mm_maskz_expand_epi16 (__mmask8 __A
, __m128i __B
)
156 return (__m128i
) __builtin_ia32_expandhi128_maskz ((__v8hi
) __B
,
157 (__v8hi
) _mm_setzero_si128 (), (__mmask8
) __A
);
160 extern __inline __m128i
161 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
162 _mm_mask_expandloadu_epi16 (__m128i __A
, __mmask8 __B
, const void * __C
)
164 return (__m128i
) __builtin_ia32_expandloadhi128_mask ((const __v8hi
*) __C
,
165 (__v8hi
) __A
, (__mmask8
) __B
);
168 extern __inline __m128i
169 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
170 _mm_maskz_expandloadu_epi16 (__mmask8 __A
, const void * __B
)
172 return (__m128i
) __builtin_ia32_expandloadhi128_maskz ((const __v8hi
*) __B
,
173 (__v8hi
) _mm_setzero_si128 (), (__mmask8
) __A
);
175 extern __inline __m256i
176 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
177 _mm256_mask_expand_epi16 (__m256i __A
, __mmask16 __B
, __m256i __C
)
179 return (__m256i
) __builtin_ia32_expandhi256_mask ((__v16hi
) __C
,
184 extern __inline __m256i
185 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
186 _mm256_maskz_expand_epi16 (__mmask16 __A
, __m256i __B
)
188 return (__m256i
) __builtin_ia32_expandhi256_maskz ((__v16hi
) __B
,
189 (__v16hi
) _mm256_setzero_si256 (), (__mmask16
) __A
);
192 extern __inline __m256i
193 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
194 _mm256_mask_expandloadu_epi16 (__m256i __A
, __mmask16 __B
, const void * __C
)
196 return (__m256i
) __builtin_ia32_expandloadhi256_mask ((const __v16hi
*) __C
,
197 (__v16hi
) __A
, (__mmask16
) __B
);
200 extern __inline __m256i
201 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
202 _mm256_maskz_expandloadu_epi16 (__mmask16 __A
, const void * __B
)
204 return (__m256i
) __builtin_ia32_expandloadhi256_maskz ((const __v16hi
*) __B
,
205 (__v16hi
) _mm256_setzero_si256 (), (__mmask16
) __A
);
209 extern __inline __m256i
210 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
211 _mm256_shrdi_epi16 (__m256i __A
, __m256i __B
, int __C
)
213 return (__m256i
) __builtin_ia32_vpshrd_v16hi ((__v16hi
)__A
, (__v16hi
) __B
,
217 extern __inline __m256i
218 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
219 _mm256_mask_shrdi_epi16 (__m256i __A
, __mmask16 __B
, __m256i __C
, __m256i __D
,
222 return (__m256i
)__builtin_ia32_vpshrd_v16hi_mask ((__v16hi
)__C
,
223 (__v16hi
) __D
, __E
, (__v16hi
) __A
, (__mmask16
)__B
);
226 extern __inline __m256i
227 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
228 _mm256_maskz_shrdi_epi16 (__mmask16 __A
, __m256i __B
, __m256i __C
, int __D
)
230 return (__m256i
)__builtin_ia32_vpshrd_v16hi_mask ((__v16hi
)__B
,
231 (__v16hi
) __C
, __D
, (__v16hi
) _mm256_setzero_si256 (), (__mmask16
)__A
);
234 extern __inline __m256i
235 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
236 _mm256_mask_shrdi_epi32 (__m256i __A
, __mmask8 __B
, __m256i __C
, __m256i __D
,
239 return (__m256i
)__builtin_ia32_vpshrd_v8si_mask ((__v8si
)__C
, (__v8si
) __D
,
240 __E
, (__v8si
) __A
, (__mmask8
)__B
);
243 extern __inline __m256i
244 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
245 _mm256_maskz_shrdi_epi32 (__mmask8 __A
, __m256i __B
, __m256i __C
, int __D
)
247 return (__m256i
)__builtin_ia32_vpshrd_v8si_mask ((__v8si
)__B
, (__v8si
) __C
,
248 __D
, (__v8si
) _mm256_setzero_si256 (), (__mmask8
)__A
);
251 extern __inline __m256i
252 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
253 _mm256_shrdi_epi32 (__m256i __A
, __m256i __B
, int __C
)
255 return (__m256i
) __builtin_ia32_vpshrd_v8si ((__v8si
)__A
, (__v8si
) __B
, __C
);
258 extern __inline __m256i
259 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
260 _mm256_mask_shrdi_epi64 (__m256i __A
, __mmask8 __B
, __m256i __C
, __m256i __D
,
263 return (__m256i
)__builtin_ia32_vpshrd_v4di_mask ((__v4di
)__C
, (__v4di
) __D
,
264 __E
, (__v4di
) __A
, (__mmask8
)__B
);
267 extern __inline __m256i
268 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
269 _mm256_maskz_shrdi_epi64 (__mmask8 __A
, __m256i __B
, __m256i __C
, int __D
)
271 return (__m256i
)__builtin_ia32_vpshrd_v4di_mask ((__v4di
)__B
, (__v4di
) __C
,
272 __D
, (__v4di
) _mm256_setzero_si256 (), (__mmask8
)__A
);
275 extern __inline __m256i
276 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
277 _mm256_shrdi_epi64 (__m256i __A
, __m256i __B
, int __C
)
279 return (__m256i
) __builtin_ia32_vpshrd_v4di ((__v4di
)__A
, (__v4di
) __B
, __C
);
282 extern __inline __m128i
283 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
284 _mm_mask_shrdi_epi16 (__m128i __A
, __mmask8 __B
, __m128i __C
, __m128i __D
,
287 return (__m128i
)__builtin_ia32_vpshrd_v8hi_mask ((__v8hi
)__C
, (__v8hi
) __D
,
288 __E
, (__v8hi
) __A
, (__mmask8
)__B
);
291 extern __inline __m128i
292 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
293 _mm_maskz_shrdi_epi16 (__mmask8 __A
, __m128i __B
, __m128i __C
, int __D
)
295 return (__m128i
)__builtin_ia32_vpshrd_v8hi_mask ((__v8hi
)__B
, (__v8hi
) __C
,
296 __D
, (__v8hi
) _mm_setzero_si128 (), (__mmask8
)__A
);
299 extern __inline __m128i
300 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
301 _mm_shrdi_epi16 (__m128i __A
, __m128i __B
, int __C
)
303 return (__m128i
) __builtin_ia32_vpshrd_v8hi ((__v8hi
)__A
, (__v8hi
) __B
, __C
);
306 extern __inline __m128i
307 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
308 _mm_mask_shrdi_epi32 (__m128i __A
, __mmask8 __B
, __m128i __C
, __m128i __D
,
311 return (__m128i
)__builtin_ia32_vpshrd_v4si_mask ((__v4si
)__C
, (__v4si
) __D
,
312 __E
, (__v4si
) __A
, (__mmask8
)__B
);
315 extern __inline __m128i
316 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
317 _mm_maskz_shrdi_epi32 (__mmask8 __A
, __m128i __B
, __m128i __C
, int __D
)
319 return (__m128i
)__builtin_ia32_vpshrd_v4si_mask ((__v4si
)__B
, (__v4si
) __C
,
320 __D
, (__v4si
) _mm_setzero_si128 (), (__mmask8
)__A
);
323 extern __inline __m128i
324 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
325 _mm_shrdi_epi32 (__m128i __A
, __m128i __B
, int __C
)
327 return (__m128i
) __builtin_ia32_vpshrd_v4si ((__v4si
)__A
, (__v4si
) __B
, __C
);
330 extern __inline __m128i
331 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
332 _mm_mask_shrdi_epi64 (__m128i __A
, __mmask8 __B
, __m128i __C
, __m128i __D
,
335 return (__m128i
)__builtin_ia32_vpshrd_v2di_mask ((__v2di
)__C
, (__v2di
) __D
,
336 __E
, (__v2di
) __A
, (__mmask8
)__B
);
339 extern __inline __m128i
340 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
341 _mm_maskz_shrdi_epi64 (__mmask8 __A
, __m128i __B
, __m128i __C
, int __D
)
343 return (__m128i
)__builtin_ia32_vpshrd_v2di_mask ((__v2di
)__B
, (__v2di
) __C
,
344 __D
, (__v2di
) _mm_setzero_si128 (), (__mmask8
)__A
);
347 extern __inline __m128i
348 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
349 _mm_shrdi_epi64 (__m128i __A
, __m128i __B
, int __C
)
351 return (__m128i
) __builtin_ia32_vpshrd_v2di ((__v2di
)__A
, (__v2di
) __B
, __C
);
354 extern __inline __m256i
355 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
356 _mm256_shldi_epi16 (__m256i __A
, __m256i __B
, int __C
)
358 return (__m256i
) __builtin_ia32_vpshld_v16hi ((__v16hi
)__A
, (__v16hi
) __B
,
362 extern __inline __m256i
363 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
364 _mm256_mask_shldi_epi16 (__m256i __A
, __mmask16 __B
, __m256i __C
, __m256i __D
,
367 return (__m256i
)__builtin_ia32_vpshld_v16hi_mask ((__v16hi
)__C
,
368 (__v16hi
) __D
, __E
, (__v16hi
) __A
, (__mmask16
)__B
);
371 extern __inline __m256i
372 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
373 _mm256_maskz_shldi_epi16 (__mmask16 __A
, __m256i __B
, __m256i __C
, int __D
)
375 return (__m256i
)__builtin_ia32_vpshld_v16hi_mask ((__v16hi
)__B
,
376 (__v16hi
) __C
, __D
, (__v16hi
) _mm256_setzero_si256 (), (__mmask16
)__A
);
379 extern __inline __m256i
380 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
381 _mm256_mask_shldi_epi32 (__m256i __A
, __mmask8 __B
, __m256i __C
, __m256i __D
,
384 return (__m256i
)__builtin_ia32_vpshld_v8si_mask ((__v8si
)__C
, (__v8si
) __D
,
385 __E
, (__v8si
) __A
, (__mmask8
)__B
);
388 extern __inline __m256i
389 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
390 _mm256_maskz_shldi_epi32 (__mmask8 __A
, __m256i __B
, __m256i __C
, int __D
)
392 return (__m256i
)__builtin_ia32_vpshld_v8si_mask ((__v8si
)__B
, (__v8si
) __C
,
393 __D
, (__v8si
) _mm256_setzero_si256 (), (__mmask8
)__A
);
396 extern __inline __m256i
397 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
398 _mm256_shldi_epi32 (__m256i __A
, __m256i __B
, int __C
)
400 return (__m256i
) __builtin_ia32_vpshld_v8si ((__v8si
)__A
, (__v8si
) __B
, __C
);
403 extern __inline __m256i
404 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
405 _mm256_mask_shldi_epi64 (__m256i __A
, __mmask8 __B
, __m256i __C
, __m256i __D
,
408 return (__m256i
)__builtin_ia32_vpshld_v4di_mask ((__v4di
)__C
, (__v4di
) __D
,
409 __E
, (__v4di
) __A
, (__mmask8
)__B
);
412 extern __inline __m256i
413 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
414 _mm256_maskz_shldi_epi64 (__mmask8 __A
, __m256i __B
, __m256i __C
, int __D
)
416 return (__m256i
)__builtin_ia32_vpshld_v4di_mask ((__v4di
)__B
, (__v4di
) __C
,
417 __D
, (__v4di
) _mm256_setzero_si256 (), (__mmask8
)__A
);
420 extern __inline __m256i
421 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
422 _mm256_shldi_epi64 (__m256i __A
, __m256i __B
, int __C
)
424 return (__m256i
) __builtin_ia32_vpshld_v4di ((__v4di
)__A
, (__v4di
) __B
, __C
);
427 extern __inline __m128i
428 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
429 _mm_mask_shldi_epi16 (__m128i __A
, __mmask8 __B
, __m128i __C
, __m128i __D
,
432 return (__m128i
)__builtin_ia32_vpshld_v8hi_mask ((__v8hi
)__C
, (__v8hi
) __D
,
433 __E
, (__v8hi
) __A
, (__mmask8
)__B
);
436 extern __inline __m128i
437 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
438 _mm_maskz_shldi_epi16 (__mmask8 __A
, __m128i __B
, __m128i __C
, int __D
)
440 return (__m128i
)__builtin_ia32_vpshld_v8hi_mask ((__v8hi
)__B
, (__v8hi
) __C
,
441 __D
, (__v8hi
) _mm_setzero_si128 (), (__mmask8
)__A
);
444 extern __inline __m128i
445 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
446 _mm_shldi_epi16 (__m128i __A
, __m128i __B
, int __C
)
448 return (__m128i
) __builtin_ia32_vpshld_v8hi ((__v8hi
)__A
, (__v8hi
) __B
, __C
);
451 extern __inline __m128i
452 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
453 _mm_mask_shldi_epi32 (__m128i __A
, __mmask8 __B
, __m128i __C
, __m128i __D
,
456 return (__m128i
)__builtin_ia32_vpshld_v4si_mask ((__v4si
)__C
, (__v4si
) __D
,
457 __E
, (__v4si
) __A
, (__mmask8
)__B
);
460 extern __inline __m128i
461 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
462 _mm_maskz_shldi_epi32 (__mmask8 __A
, __m128i __B
, __m128i __C
, int __D
)
464 return (__m128i
)__builtin_ia32_vpshld_v4si_mask ((__v4si
)__B
, (__v4si
) __C
,
465 __D
, (__v4si
) _mm_setzero_si128 (), (__mmask8
)__A
);
468 extern __inline __m128i
469 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
470 _mm_shldi_epi32 (__m128i __A
, __m128i __B
, int __C
)
472 return (__m128i
) __builtin_ia32_vpshld_v4si ((__v4si
)__A
, (__v4si
) __B
, __C
);
475 extern __inline __m128i
476 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
477 _mm_mask_shldi_epi64 (__m128i __A
, __mmask8 __B
, __m128i __C
, __m128i __D
,
480 return (__m128i
)__builtin_ia32_vpshld_v2di_mask ((__v2di
)__C
, (__v2di
) __D
,
481 __E
, (__v2di
) __A
, (__mmask8
)__B
);
484 extern __inline __m128i
485 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
486 _mm_maskz_shldi_epi64 (__mmask8 __A
, __m128i __B
, __m128i __C
, int __D
)
488 return (__m128i
)__builtin_ia32_vpshld_v2di_mask ((__v2di
)__B
, (__v2di
) __C
,
489 __D
, (__v2di
) _mm_setzero_si128 (), (__mmask8
)__A
);
492 extern __inline __m128i
493 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
494 _mm_shldi_epi64 (__m128i __A
, __m128i __B
, int __C
)
496 return (__m128i
) __builtin_ia32_vpshld_v2di ((__v2di
)__A
, (__v2di
) __B
, __C
);
499 #define _mm256_shrdi_epi16(A, B, C) \
500 ((__m256i) __builtin_ia32_vpshrd_v16hi ((__v16hi)(__m256i)(A), \
501 (__v16hi)(__m256i)(B),(int)(C))
502 #define _mm256_mask_shrdi_epi16(A, B, C, D, E) \
503 ((__m256i) __builtin_ia32_vpshrd_v16hi_mask ((__v16hi)(__m256i)(C), \
504 (__v16hi)(__m256i)(D), (int)(E), (__v16hi)(__m256i)(A),(__mmask16)(B))
505 #define _mm256_maskz_shrdi_epi16(A, B, C, D) \
506 ((__m256i) __builtin_ia32_vpshrd_v16hi_mask ((__v16hi)(__m256i)(B), \
507 (__v16hi)(__m256i)(C),(int)(D), \
508 (__v16hi)(__m256i)_mm256_setzero_si256 (), (__mmask16)(A))
509 #define _mm256_shrdi_epi32(A, B, C) \
510 ((__m256i) __builtin_ia32_vpshrd_v8si ((__v8si)(__m256i)(A), \
511 (__v8si)(__m256i)(B),(int)(C))
512 #define _mm256_mask_shrdi_epi32(A, B, C, D, E) \
513 ((__m256i) __builtin_ia32_vpshrd_v8si_mask ((__v8si)(__m256i)(C), \
514 (__v8si)(__m256i)(D), (int)(E), (__v8si)(__m256i)(A),(__mmask8)(B))
515 #define _mm256_maskz_shrdi_epi32(A, B, C, D) \
516 ((__m256i) __builtin_ia32_vpshrd_v8si_mask ((__v8si)(__m256i)(B), \
517 (__v8si)(__m256i)(C),(int)(D), \
518 (__v8si)(__m256i)_mm256_setzero_si256 (), (__mmask8)(A))
519 #define _mm256_shrdi_epi64(A, B, C) \
520 ((__m256i) __builtin_ia32_vpshrd_v4di ((__v4di)(__m256i)(A), \
521 (__v4di)(__m256i)(B),(int)(C))
522 #define _mm256_mask_shrdi_epi64(A, B, C, D, E) \
523 ((__m256i) __builtin_ia32_vpshrd_v4di_mask ((__v4di)(__m256i)(C), \
524 (__v4di)(__m256i)(D), (int)(E), (__v4di)(__m256i)(A),(__mmask8)(B))
525 #define _mm256_maskz_shrdi_epi64(A, B, C, D) \
526 ((__m256i) __builtin_ia32_vpshrd_v4di_mask ((__v4di)(__m256i)(B), \
527 (__v4di)(__m256i)(C),(int)(D), \
528 (__v4di)(__m256i)_mm256_setzero_si256 (), (__mmask8)(A))
529 #define _mm_shrdi_epi16(A, B, C) \
530 ((__m128i) __builtin_ia32_vpshrd_v8hi ((__v8hi)(__m128i)(A), \
531 (__v8hi)(__m128i)(B),(int)(C))
532 #define _mm_mask_shrdi_epi16(A, B, C, D, E) \
533 ((__m128i) __builtin_ia32_vpshrd_v8hi_mask ((__v8hi)(__m128i)(C), \
534 (__v8hi)(__m128i)(D), (int)(E), (__v8hi)(__m128i)(A),(__mmask8)(B))
535 #define _mm_maskz_shrdi_epi16(A, B, C, D) \
536 ((__m128i) __builtin_ia32_vpshrd_v8hi_mask ((__v8hi)(__m128i)(B), \
537 (__v8hi)(__m128i)(C),(int)(D), \
538 (__v8hi)(__m128i)_mm_setzero_si128 (), (__mmask8)(A))
539 #define _mm_shrdi_epi32(A, B, C) \
540 ((__m128i) __builtin_ia32_vpshrd_v4si ((__v4si)(__m128i)(A), \
541 (__v4si)(__m128i)(B),(int)(C))
542 #define _mm_mask_shrdi_epi32(A, B, C, D, E) \
543 ((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(C), \
544 (__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask16)(B))
545 #define _mm_maskz_shrdi_epi32(A, B, C, D) \
546 ((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(B), \
547 (__v4si)(__m128i)(C),(int)(D), \
548 (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(A))
549 #define _mm_shrdi_epi64(A, B, C) \
550 ((__m128i) __builtin_ia32_vpshrd_v2di ((__v2di)(__m128i)(A), \
551 (__v2di)(__m128i)(B),(int)(C))
552 #define _mm_mask_shrdi_epi64(A, B, C, D, E) \
553 ((__m128i) __builtin_ia32_vpshrd_v2di_mask ((__v2di)(__m128i)(C), \
554 (__v2di)(__m128i)(D), (int)(E), (__v2di)(__m128i)(A),(__mmask8)(B))
555 #define _mm_maskz_shrdi_epi64(A, B, C, D) \
556 ((__m128i) __builtin_ia32_vpshrd_v2di_mask ((__v2di)(__m128i)(B), \
557 (__v2di)(__m128i)(C),(int)(D), \
558 (__v2di)(__m128i)_mm_setzero_si128 (), (__mmask8)(A))
559 #define _mm256_shldi_epi16(A, B, C) \
560 ((__m256i) __builtin_ia32_vpshld_v16hi ((__v16hi)(__m256i)(A), \
561 (__v16hi)(__m256i)(B),(int)(C))
562 #define _mm256_mask_shldi_epi16(A, B, C, D, E) \
563 ((__m256i) __builtin_ia32_vpshld_v16hi_mask ((__v16hi)(__m256i)(C), \
564 (__v16hi)(__m256i)(D), (int)(E), (__v16hi)(__m256i)(A),(__mmask16)(B))
565 #define _mm256_maskz_shldi_epi16(A, B, C, D) \
566 ((__m256i) __builtin_ia32_vpshld_v16hi_mask ((__v16hi)(__m256i)(B), \
567 (__v16hi)(__m256i)(C),(int)(D), \
568 (__v16hi)(__m256i)_mm256_setzero_si256 (), (__mmask16)(A))
569 #define _mm256_shldi_epi32(A, B, C) \
570 ((__m256i) __builtin_ia32_vpshld_v8si ((__v8si)(__m256i)(A), \
571 (__v8si)(__m256i)(B),(int)(C))
572 #define _mm256_mask_shldi_epi32(A, B, C, D, E) \
573 ((__m256i) __builtin_ia32_vpshld_v8si_mask ((__v8si)(__m256i)(C), \
574 (__v8si)(__m256i)(D), (int)(E), (__v8si)(__m256i)(A),(__mmask8)(B))
575 #define _mm256_maskz_shldi_epi32(A, B, C, D) \
576 ((__m256i) __builtin_ia32_vpshld_v8si_mask ((__v8si)(__m256i)(B), \
577 (__v8si)(__m256i)(C),(int)(D), \
578 (__v8si)(__m256i)_mm256_setzero_si256 (), (__mmask8)(A))
579 #define _mm256_shldi_epi64(A, B, C) \
580 ((__m256i) __builtin_ia32_vpshld_v4di ((__v4di)(__m256i)(A), \
581 (__v4di)(__m256i)(B),(int)(C))
582 #define _mm256_mask_shldi_epi64(A, B, C, D, E) \
583 ((__m256i) __builtin_ia32_vpshld_v4di_mask ((__v4di)(__m256i)(C), \
584 (__v4di)(__m256i)(D), (int)(E), (__v4di)(__m256i)(A),(__mmask8)(B))
585 #define _mm256_maskz_shldi_epi64(A, B, C, D) \
586 ((__m256i) __builtin_ia32_vpshld_v4di_mask ((__v4di)(__m256i)(B), \
587 (__v4di)(__m256i)(C),(int)(D), \
588 (__v4di)(__m256i)_mm256_setzero_si256 (), (__mmask8)(A))
589 #define _mm_shldi_epi16(A, B, C) \
590 ((__m128i) __builtin_ia32_vpshld_v8hi ((__v8hi)(__m128i)(A), \
591 (__v8hi)(__m128i)(B),(int)(C))
592 #define _mm_mask_shldi_epi16(A, B, C, D, E) \
593 ((__m128i) __builtin_ia32_vpshld_v8hi_mask ((__v8hi)(__m128i)(C), \
594 (__v8hi)(__m128i)(D), (int)(E), (__v8hi)(__m128i)(A),(__mmask8)(B))
595 #define _mm_maskz_shldi_epi16(A, B, C, D) \
596 ((__m128i) __builtin_ia32_vpshld_v8hi_mask ((__v8hi)(__m128i)(B), \
597 (__v8hi)(__m128i)(C),(int)(D), \
598 (__v8hi)(__m128i)_mm_setzero_si128 (), (__mmask8)(A))
599 #define _mm_shldi_epi32(A, B, C) \
600 ((__m128i) __builtin_ia32_vpshld_v4si ((__v4si)(__m128i)(A), \
601 (__v4si)(__m128i)(B),(int)(C))
602 #define _mm_mask_shldi_epi32(A, B, C, D, E) \
603 ((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(C), \
604 (__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask16)(B))
605 #define _mm_maskz_shldi_epi32(A, B, C, D) \
606 ((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(B), \
607 (__v4si)(__m128i)(C),(int)(D), \
608 (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(A))
609 #define _mm_shldi_epi64(A, B, C) \
610 ((__m128i) __builtin_ia32_vpshld_v2di ((__v2di)(__m128i)(A), \
611 (__v2di)(__m128i)(B),(int)(C))
612 #define _mm_mask_shldi_epi64(A, B, C, D, E) \
613 ((__m128i) __builtin_ia32_vpshld_v2di_mask ((__v2di)(__m128i)(C), \
614 (__v2di)(__m128i)(D), (int)(E), (__v2di)(__m128i)(A),(__mmask8)(B))
615 #define _mm_maskz_shldi_epi64(A, B, C, D) \
616 ((__m128i) __builtin_ia32_vpshld_v2di_mask ((__v2di)(__m128i)(B), \
617 (__v2di)(__m128i)(C),(int)(D), \
618 (__v2di)(__m128i)_mm_setzero_si128 (), (__mmask8)(A))
621 #ifdef __DISABLE_AVX512VBMI2VL__
622 #undef __DISABLE_AVX512VBMI2VL__
623 #pragma GCC pop_options
624 #endif /* __DISABLE_AVX512VBMIVL__ */
626 #if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__) || \
627 !defined(__AVX512BW__)
628 #pragma GCC push_options
629 #pragma GCC target("avx512vbmi2,avx512vl,avx512bw")
630 #define __DISABLE_AVX512VBMI2VLBW__
631 #endif /* __AVX512VBMIVLBW__ */
633 extern __inline __m256i
634 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
635 _mm256_mask_compress_epi8 (__m256i __A
, __mmask32 __B
, __m256i __C
)
637 return (__m256i
) __builtin_ia32_compressqi256_mask ((__v32qi
)__C
,
638 (__v32qi
)__A
, (__mmask32
)__B
);
641 extern __inline __m256i
642 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
643 _mm256_maskz_compress_epi8 (__mmask32 __A
, __m256i __B
)
645 return (__m256i
) __builtin_ia32_compressqi256_mask ((__v32qi
) __B
,
646 (__v32qi
) _mm256_setzero_si256 (), (__mmask32
) __A
);
650 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
651 _mm256_mask_compressstoreu_epi8 (void * __A
, __mmask32 __B
, __m256i __C
)
653 __builtin_ia32_compressstoreuqi256_mask ((__v32qi
*) __A
, (__v32qi
) __C
,
657 extern __inline __m256i
658 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
659 _mm256_mask_expand_epi8 (__m256i __A
, __mmask32 __B
, __m256i __C
)
661 return (__m256i
) __builtin_ia32_expandqi256_mask ((__v32qi
) __C
,
666 extern __inline __m256i
667 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
668 _mm256_maskz_expand_epi8 (__mmask32 __A
, __m256i __B
)
670 return (__m256i
) __builtin_ia32_expandqi256_maskz ((__v32qi
) __B
,
671 (__v32qi
) _mm256_setzero_si256 (), (__mmask32
) __A
);
674 extern __inline __m256i
675 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
676 _mm256_mask_expandloadu_epi8 (__m256i __A
, __mmask32 __B
, const void * __C
)
678 return (__m256i
) __builtin_ia32_expandloadqi256_mask ((const __v32qi
*) __C
,
679 (__v32qi
) __A
, (__mmask32
) __B
);
682 extern __inline __m256i
683 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
684 _mm256_maskz_expandloadu_epi8 (__mmask32 __A
, const void * __B
)
686 return (__m256i
) __builtin_ia32_expandloadqi256_maskz ((const __v32qi
*) __B
,
687 (__v32qi
) _mm256_setzero_si256 (), (__mmask32
) __A
);
690 #ifdef __DISABLE_AVX512VBMI2VLBW__
691 #undef __DISABLE_AVX512VBMI2VLBW__
692 #pragma GCC pop_options
693 #endif /* __DISABLE_AVX512VBMIVLBW__ */
695 #endif /* _AVX512VBMIVLINTRIN_H_INCLUDED */