]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/avx512vlbwintrin.h
Update copyright years.
[thirdparty/gcc.git] / gcc / config / i386 / avx512vlbwintrin.h
1 /* Copyright (C) 2014-2024 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef _AVX512VLBWINTRIN_H_INCLUDED
29 #define _AVX512VLBWINTRIN_H_INCLUDED
30
31 #if !defined(__AVX512VL__) || !defined(__AVX512BW__) || defined (__EVEX512__)
32 #pragma GCC push_options
33 #pragma GCC target("avx512vl,avx512bw,no-evex512")
34 #define __DISABLE_AVX512VLBW__
35 #endif /* __AVX512VLBW__ */
36
37 /* Internal data types for implementing the intrinsics. */
38 typedef short __v16hi_u __attribute__ ((__vector_size__ (32), \
39 __may_alias__, __aligned__ (1)));
40 typedef short __v8hi_u __attribute__ ((__vector_size__ (16), \
41 __may_alias__, __aligned__ (1)));
42 typedef char __v32qi_u __attribute__ ((__vector_size__ (32), \
43 __may_alias__, __aligned__ (1)));
44 typedef char __v16qi_u __attribute__ ((__vector_size__ (16), \
45 __may_alias__, __aligned__ (1)));
46
47 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
48 _mm_avx512_set1_epi32 (int __A)
49 {
50 return _mm_avx512_set_epi32 (__A, __A, __A, __A);
51 }
52
53 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
54 _mm_avx512_set1_epi16 (short __A)
55 {
56 return _mm_avx512_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
57 }
58
59 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
60 _mm_avx512_set1_epi8 (char __A)
61 {
62 return _mm_avx512_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
63 __A, __A, __A, __A, __A, __A, __A, __A);
64 }
65
66 extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
67 _mm256_avx512_set_epi16 (short __q15, short __q14, short __q13, short __q12,
68 short __q11, short __q10, short __q09, short __q08,
69 short __q07, short __q06, short __q05, short __q04,
70 short __q03, short __q02, short __q01, short __q00)
71 {
72 return __extension__ (__m256i)(__v16hi){
73 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
74 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
75 };
76 }
77
78 extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
79 _mm256_avx512_set_epi8 (char __q31, char __q30, char __q29, char __q28,
80 char __q27, char __q26, char __q25, char __q24,
81 char __q23, char __q22, char __q21, char __q20,
82 char __q19, char __q18, char __q17, char __q16,
83 char __q15, char __q14, char __q13, char __q12,
84 char __q11, char __q10, char __q09, char __q08,
85 char __q07, char __q06, char __q05, char __q04,
86 char __q03, char __q02, char __q01, char __q00)
87 {
88 return __extension__ (__m256i)(__v32qi){
89 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
90 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
91 __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
92 __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31
93 };
94 }
95
96 extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
97 _mm256_avx512_set1_epi16 (short __A)
98 {
99 return _mm256_avx512_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A,
100 __A, __A, __A, __A, __A, __A, __A, __A);
101 }
102
103 extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
104 _mm256_avx512_set1_epi32 (int __A)
105 {
106 return __extension__ (__m256i)(__v8si){ __A, __A, __A, __A,
107 __A, __A, __A, __A };
108 }
109
110 extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
111 _mm256_avx512_set1_epi8 (char __A)
112 {
113 return _mm256_avx512_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
114 __A, __A, __A, __A, __A, __A, __A, __A,
115 __A, __A, __A, __A, __A, __A, __A, __A,
116 __A, __A, __A, __A, __A, __A, __A, __A);
117 }
118
119 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
120 _mm_avx512_max_epi16 (__m128i __A, __m128i __B)
121 {
122 return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
123 }
124
125 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
126 _mm_avx512_min_epi16 (__m128i __A, __m128i __B)
127 {
128 return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
129 }
130
131 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
132 _mm_avx512_max_epu16 (__m128i __X, __m128i __Y)
133 {
134 return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi)__X, (__v8hi)__Y);
135 }
136
137 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
138 _mm_avx512_min_epu16 (__m128i __X, __m128i __Y)
139 {
140 return (__m128i) __builtin_ia32_pminuw128 ((__v8hi)__X, (__v8hi)__Y);
141 }
142
143 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
144 _mm_avx512_max_epi8 (__m128i __X, __m128i __Y)
145 {
146 return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi)__X, (__v16qi)__Y);
147 }
148
149 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
150 _mm_avx512_min_epi8 (__m128i __X, __m128i __Y)
151 {
152 return (__m128i) __builtin_ia32_pminsb128 ((__v16qi)__X, (__v16qi)__Y);
153 }
154
155 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
156 _mm_avx512_max_epu8 (__m128i __A, __m128i __B)
157 {
158 return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
159 }
160
161 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
162 _mm_avx512_min_epu8 (__m128i __A, __m128i __B)
163 {
164 return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
165 }
166
167 extern __inline __m256i
168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
169 _mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
170 {
171 return (__m256i) __builtin_ia32_movdquqi256_mask ((__v32qi) __A,
172 (__v32qi) __W,
173 (__mmask32) __U);
174 }
175
176 extern __inline __m256i
177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
178 _mm256_avx512_max_epi16 (__m256i __A, __m256i __B)
179 {
180 return (__m256i)__builtin_ia32_pmaxsw256 ((__v16hi)__A, (__v16hi)__B);
181 }
182
183 extern __inline __m256i
184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
185 _mm256_avx512_min_epi16 (__m256i __A, __m256i __B)
186 {
187 return (__m256i)__builtin_ia32_pminsw256 ((__v16hi)__A, (__v16hi)__B);
188 }
189
190 extern __inline __m256i
191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
192 _mm256_avx512_max_epu16 (__m256i __A, __m256i __B)
193 {
194 return (__m256i)__builtin_ia32_pmaxuw256 ((__v16hi)__A, (__v16hi)__B);
195 }
196
197 extern __inline __m256i
198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
199 _mm256_avx512_min_epu16 (__m256i __A, __m256i __B)
200 {
201 return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__A, (__v16hi)__B);
202 }
203
204 #ifdef __OPTIMIZE__
205 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
206 _mm256_avx512_insertf128_ps (__m256 __X, __m128 __Y, const int __O)
207 {
208 return (__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)__X,
209 (__v4sf)__Y,
210 __O);
211 }
212
213 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
214 _mm256_avx512_extractf128_pd (__m256d __X, const int __N)
215 {
216 return (__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)__X, __N);
217 }
218
219 extern __inline __m128i
220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
221 _mm256_avx512_extracti128_si256 (__m256i __X, const int __M)
222 {
223 return (__m128i) __builtin_ia32_extract128i256 ((__v4di)__X, __M);
224 }
225 #else
226 #define _mm256_avx512_insertf128_ps(X, Y, O) \
227 ((__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)(__m256)(X), \
228 (__v4sf)(__m128)(Y), \
229 (int)(O)))
230
231 #define _mm256_avx512_extractf128_pd(X, N) \
232 ((__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)(__m256d)(X), \
233 (int)(N)))
234
235 #define _mm256_avx512_extracti128_si256(X, M) \
236 ((__m128i) __builtin_ia32_extract128i256 ((__v4di)(__m256i)(X), (int)(M)))
237 #endif
238
239 #define _MM256_AVX512_REDUCE_OPERATOR_BASIC_EPI16(op) \
240 __v8hi __T1 = (__v8hi)_mm256_avx512_extracti128_si256 (__W, 0); \
241 __v8hi __T2 = (__v8hi)_mm256_avx512_extracti128_si256 (__W, 1); \
242 __v8hi __T3 = __T1 op __T2; \
243 __v8hi __T4 = __builtin_shufflevector (__T3, __T3, 4, 5, 6, 7, 4, 5, 6, 7); \
244 __v8hi __T5 = __T3 op __T4; \
245 __v8hi __T6 = __builtin_shufflevector (__T5, __T5, 2, 3, 2, 3, 4, 5, 6, 7); \
246 __v8hi __T7 = __T5 op __T6; \
247 __v8hi __T8 = __builtin_shufflevector (__T7, __T7, 1, 1, 2, 3, 4, 5, 6, 7); \
248 __v8hi __T9 = __T7 op __T8; \
249 return __T9[0]
250
251 #define _MM256_AVX512_REDUCE_OPERATOR_MAX_MIN_EP16(op) \
252 __m128i __T1 = _mm256_avx512_extracti128_si256 (__V, 0); \
253 __m128i __T2 = _mm256_avx512_extracti128_si256 (__V, 1); \
254 __m128i __T3 = _mm_avx512_##op (__T1, __T2); \
255 __m128i __T4 = (__m128i)__builtin_shufflevector ((__v8hi)__T3, \
256 (__v8hi)__T3, 4, 5, 6, 7, 4, 5, 6, 7); \
257 __m128i __T5 = _mm_avx512_##op (__T3, __T4); \
258 __m128i __T6 = (__m128i)__builtin_shufflevector ((__v8hi)__T5, \
259 (__v8hi)__T5, 2, 3, 2, 3, 4, 5, 6, 7); \
260 __m128i __T7 = _mm_avx512_##op (__T5, __T6); \
261 __m128i __T8 = (__m128i)__builtin_shufflevector ((__v8hi)__T7, \
262 (__v8hi)__T7, 1, 1, 2, 3, 4, 5, 6, 7); \
263 __v8hi __T9 = (__v8hi)_mm_avx512_##op (__T7, __T8); \
264 return __T9[0]
265
266 #define _MM256_AVX512_REDUCE_OPERATOR_BASIC_EPI8(op) \
267 __v16qi __T1 = (__v16qi)_mm256_avx512_extracti128_si256 (__W, 0); \
268 __v16qi __T2 = (__v16qi)_mm256_avx512_extracti128_si256 (__W, 1); \
269 __v16qi __T3 = __T1 op __T2; \
270 __v16qi __T4 = __builtin_shufflevector (__T3, __T3, \
271 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15); \
272 __v16qi __T5 = __T3 op __T4; \
273 __v16qi __T6 = __builtin_shufflevector (__T5, __T5, \
274 4, 5, 6, 7, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
275 __v16qi __T7 = __T5 op __T6; \
276 __v16qi __T8 = __builtin_shufflevector (__T7, __T7, \
277 2, 3, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
278 __v16qi __T9 = __T7 op __T8; \
279 __v16qi __T10 = __builtin_shufflevector (__T9, __T9, \
280 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
281 __v16qi __T11 = __T9 op __T10; \
282 return __T11[0]
283
284 #define _MM256_AVX512_REDUCE_OPERATOR_MAX_MIN_EP8(op) \
285 __m128i __T1 = _mm256_avx512_extracti128_si256 (__V, 0); \
286 __m128i __T2 = _mm256_avx512_extracti128_si256 (__V, 1); \
287 __m128i __T3 = _mm_avx512_##op (__T1, __T2); \
288 __m128i __T4 = (__m128i)__builtin_shufflevector ((__v16qi)__T3, \
289 (__v16qi)__T3, \
290 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15); \
291 __m128i __T5 = _mm_avx512_##op (__T3, __T4); \
292 __m128i __T6 = (__m128i)__builtin_shufflevector ((__v16qi)__T5, \
293 (__v16qi)__T5, \
294 4, 5, 6, 7, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
295 __m128i __T7 = _mm_avx512_##op (__T5, __T6); \
296 __m128i __T8 = (__m128i)__builtin_shufflevector ((__v16qi)__T7, \
297 (__v16qi)__T5, \
298 2, 3, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
299 __m128i __T9 = _mm_avx512_##op (__T7, __T8); \
300 __m128i __T10 = (__m128i)__builtin_shufflevector ((__v16qi)__T9, \
301 (__v16qi)__T9, \
302 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
303 __v16qi __T11 = (__v16qi)_mm_avx512_##op (__T9, __T10); \
304 return __T11[0]
305
306 extern __inline __m256i
307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
308 _mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A)
309 {
310 return (__m256i) __builtin_ia32_movdquqi256_mask ((__v32qi) __A,
311 (__v32qi)
312 _mm256_avx512_setzero_si256 (),
313 (__mmask32) __U);
314 }
315
316 extern __inline __m128i
317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
318 _mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
319 {
320 return (__m128i) __builtin_ia32_movdquqi128_mask ((__v16qi) __A,
321 (__v16qi) __W,
322 (__mmask16) __U);
323 }
324
325 extern __inline __m128i
326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
327 _mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
328 {
329 return (__m128i) __builtin_ia32_movdquqi128_mask ((__v16qi) __A,
330 (__v16qi)
331 _mm_avx512_setzero_si128 (),
332 (__mmask16) __U);
333 }
334
335 extern __inline void
336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
337 _mm256_storeu_epi8 (void *__P, __m256i __A)
338 {
339 *(__v32qi_u *) __P = (__v32qi_u) __A;
340 }
341
342 extern __inline void
343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
344 _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
345 {
346 __builtin_ia32_storedquqi256_mask ((char *) __P,
347 (__v32qi) __A,
348 (__mmask32) __U);
349 }
350
351 extern __inline void
352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
353 _mm_storeu_epi8 (void *__P, __m128i __A)
354 {
355 *(__v16qi_u *) __P = (__v16qi_u) __A;
356 }
357
358 extern __inline void
359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
360 _mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
361 {
362 __builtin_ia32_storedquqi128_mask ((char *) __P,
363 (__v16qi) __A,
364 (__mmask16) __U);
365 }
366
367 extern __inline __m256i
368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
369 _mm256_loadu_epi16 (void const *__P)
370 {
371 return (__m256i) (*(__v16hi_u *) __P);
372 }
373
374 extern __inline __m256i
375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
376 _mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
377 {
378 return (__m256i) __builtin_ia32_loaddquhi256_mask ((const short *) __P,
379 (__v16hi) __W,
380 (__mmask16) __U);
381 }
382
383 extern __inline __m256i
384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
385 _mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
386 {
387 return (__m256i) __builtin_ia32_loaddquhi256_mask ((const short *) __P,
388 (__v16hi)
389 _mm256_avx512_setzero_si256 (),
390 (__mmask16) __U);
391 }
392
393 extern __inline __m128i
394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
395 _mm_loadu_epi16 (void const *__P)
396 {
397 return (__m128i) (*(__v8hi_u *) __P);
398 }
399
400 extern __inline __m128i
401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
402 _mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
403 {
404 return (__m128i) __builtin_ia32_loaddquhi128_mask ((const short *) __P,
405 (__v8hi) __W,
406 (__mmask8) __U);
407 }
408
409 extern __inline __m128i
410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
411 _mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P)
412 {
413 return (__m128i) __builtin_ia32_loaddquhi128_mask ((const short *) __P,
414 (__v8hi)
415 _mm_avx512_setzero_si128 (),
416 (__mmask8) __U);
417 }
418
419
420 extern __inline __m256i
421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
422 _mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
423 {
424 return (__m256i) __builtin_ia32_movdquhi256_mask ((__v16hi) __A,
425 (__v16hi) __W,
426 (__mmask16) __U);
427 }
428
429 extern __inline __m256i
430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
431 _mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
432 {
433 return (__m256i) __builtin_ia32_movdquhi256_mask ((__v16hi) __A,
434 (__v16hi)
435 _mm256_avx512_setzero_si256 (),
436 (__mmask16) __U);
437 }
438
439 extern __inline __m128i
440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
441 _mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
442 {
443 return (__m128i) __builtin_ia32_movdquhi128_mask ((__v8hi) __A,
444 (__v8hi) __W,
445 (__mmask8) __U);
446 }
447
448 extern __inline __m128i
449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
450 _mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
451 {
452 return (__m128i) __builtin_ia32_movdquhi128_mask ((__v8hi) __A,
453 (__v8hi)
454 _mm_avx512_setzero_si128 (),
455 (__mmask8) __U);
456 }
457
458 extern __inline __m256i
459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
460 _mm256_loadu_epi8 (void const *__P)
461 {
462 return (__m256i) (*(__v32qi_u *) __P);
463 }
464
465 extern __inline __m256i
466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
467 _mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
468 {
469 return (__m256i) __builtin_ia32_loaddquqi256_mask ((const char *) __P,
470 (__v32qi) __W,
471 (__mmask32) __U);
472 }
473
474 extern __inline __m256i
475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
476 _mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
477 {
478 return (__m256i) __builtin_ia32_loaddquqi256_mask ((const char *) __P,
479 (__v32qi)
480 _mm256_avx512_setzero_si256 (),
481 (__mmask32) __U);
482 }
483
484 extern __inline __m128i
485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
486 _mm_loadu_epi8 (void const *__P)
487 {
488 return (__m128i) (*(__v16qi_u *) __P);
489 }
490
491 extern __inline __m128i
492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
493 _mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
494 {
495 return (__m128i) __builtin_ia32_loaddquqi128_mask ((const char *) __P,
496 (__v16qi) __W,
497 (__mmask16) __U);
498 }
499
500 extern __inline __m128i
501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
502 _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
503 {
504 return (__m128i) __builtin_ia32_loaddquqi128_mask ((const char *) __P,
505 (__v16qi)
506 _mm_avx512_setzero_si128 (),
507 (__mmask16) __U);
508 }
509
510 extern __inline __m128i
511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
512 _mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
513 {
514 return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A,
515 (__v8hi) __W,
516 (__mmask8) __U);
517 }
518
519 extern __inline __m128i
520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
521 _mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
522 {
523 return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A,
524 (__v16qi) __W,
525 (__mmask16) __U);
526 }
527
528 extern __inline __m256i
529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
530 _mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
531 {
532 return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A,
533 (__v16hi) __W,
534 (__mmask16) __U);
535 }
536
537 extern __inline __m256i
538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
539 _mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
540 {
541 return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A,
542 (__v32qi) __W,
543 (__mmask32) __U);
544 }
545
546 extern __inline __m128i
547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
548 _mm256_cvtepi16_epi8 (__m256i __A)
549 {
550
551 return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
552 (__v16qi)_mm_avx512_undefined_si128(),
553 (__mmask16) -1);
554 }
555
556 extern __inline void
557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
558 _mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M,__m256i __A)
559 {
560 __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P , (__v16hi) __A, __M);
561 }
562
563 extern __inline __m128i
564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
565 _mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
566 {
567 return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
568 (__v16qi) __O, __M);
569 }
570
571 extern __inline __m128i
572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
573 _mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A)
574 {
575 return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
576 (__v16qi)
577 _mm_avx512_setzero_si128 (),
578 __M);
579 }
580
581 extern __inline __m128i
582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
583 _mm_cvtsepi16_epi8 (__m128i __A)
584 {
585
586 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
587 (__v16qi)_mm_avx512_undefined_si128(),
588 (__mmask8) -1);
589 }
590
591 extern __inline void
592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
593 _mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M,__m128i __A)
594 {
595 __builtin_ia32_pmovswb128mem_mask ((unsigned long long *) __P , (__v8hi) __A, __M);
596 }
597
598 extern __inline __m128i
599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
600 _mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
601 {
602 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
603 (__v16qi) __O, __M);
604 }
605
606 extern __inline __m128i
607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
608 _mm_maskz_cvtsepi16_epi8 (__mmask8 __M, __m128i __A)
609 {
610 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
611 (__v16qi)
612 _mm_avx512_setzero_si128 (),
613 __M);
614 }
615
616 extern __inline __m128i
617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
618 _mm256_cvtsepi16_epi8 (__m256i __A)
619 {
620
621 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
622 (__v16qi)_mm_avx512_undefined_si128(),
623 (__mmask16) -1);
624 }
625
626 extern __inline void
627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
628 _mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M,__m256i __A)
629 {
630 __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P , (__v16hi) __A, __M);
631 }
632
633 extern __inline __m128i
634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
635 _mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
636 {
637 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
638 (__v16qi) __O, __M);
639 }
640
641 extern __inline __m128i
642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
643 _mm256_maskz_cvtsepi16_epi8 (__mmask16 __M, __m256i __A)
644 {
645 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
646 (__v16qi)
647 _mm_avx512_setzero_si128 (),
648 __M);
649 }
650
651 extern __inline __m128i
652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
653 _mm_cvtusepi16_epi8 (__m128i __A)
654 {
655
656 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
657 (__v16qi)_mm_avx512_undefined_si128(),
658 (__mmask8) -1);
659 }
660
661 extern __inline void
662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
663 _mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M,__m128i __A)
664 {
665 __builtin_ia32_pmovuswb128mem_mask ((unsigned long long *) __P , (__v8hi) __A, __M);
666 }
667
668 extern __inline __m128i
669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
670 _mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
671 {
672 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
673 (__v16qi) __O,
674 __M);
675 }
676
677 extern __inline __m128i
678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
679 _mm_maskz_cvtusepi16_epi8 (__mmask8 __M, __m128i __A)
680 {
681 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
682 (__v16qi)
683 _mm_avx512_setzero_si128 (),
684 __M);
685 }
686
687 extern __inline __m128i
688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
689 _mm256_cvtusepi16_epi8 (__m256i __A)
690 {
691
692 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
693 (__v16qi)_mm_avx512_undefined_si128(),
694 (__mmask16) -1);
695 }
696
697 extern __inline void
698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
699 _mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M,__m256i __A)
700 {
701 __builtin_ia32_pmovuswb256mem_mask ((__v16qi *) __P , (__v16hi) __A, __M);
702 }
703
704 extern __inline __m128i
705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
706 _mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
707 {
708 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
709 (__v16qi) __O,
710 __M);
711 }
712
713 extern __inline __m128i
714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
715 _mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A)
716 {
717 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
718 (__v16qi)
719 _mm_avx512_setzero_si128 (),
720 __M);
721 }
722
723 extern __inline __m256i
724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
725 _mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A)
726 {
727 return (__m256i) __builtin_ia32_pbroadcastb256_mask ((__v16qi) __A,
728 (__v32qi) __O,
729 __M);
730 }
731
732 extern __inline __m256i
733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
734 _mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A)
735 {
736 return (__m256i) __builtin_ia32_pbroadcastb256_mask ((__v16qi) __A,
737 (__v32qi)
738 _mm256_avx512_setzero_si256 (),
739 __M);
740 }
741
742 extern __inline __m256i
743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
744 _mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A)
745 {
746 return (__m256i) __builtin_ia32_pbroadcastb256_gpr_mask (__A,
747 (__v32qi) __O,
748 __M);
749 }
750
751 extern __inline __m256i
752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
753 _mm256_maskz_set1_epi8 (__mmask32 __M, char __A)
754 {
755 return (__m256i) __builtin_ia32_pbroadcastb256_gpr_mask (__A,
756 (__v32qi)
757 _mm256_avx512_setzero_si256 (),
758 __M);
759 }
760
761 extern __inline __m128i
762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
763 _mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A)
764 {
765 return (__m128i) __builtin_ia32_pbroadcastb128_mask ((__v16qi) __A,
766 (__v16qi) __O,
767 __M);
768 }
769
770 extern __inline __m128i
771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
772 _mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A)
773 {
774 return (__m128i) __builtin_ia32_pbroadcastb128_mask ((__v16qi) __A,
775 (__v16qi)
776 _mm_avx512_setzero_si128 (),
777 __M);
778 }
779
780 extern __inline __m128i
781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
782 _mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A)
783 {
784 return (__m128i) __builtin_ia32_pbroadcastb128_gpr_mask (__A,
785 (__v16qi) __O,
786 __M);
787 }
788
789 extern __inline __m128i
790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
791 _mm_maskz_set1_epi8 (__mmask16 __M, char __A)
792 {
793 return (__m128i) __builtin_ia32_pbroadcastb128_gpr_mask (__A,
794 (__v16qi)
795 _mm_avx512_setzero_si128 (),
796 __M);
797 }
798
799 extern __inline __m256i
800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
801 _mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A)
802 {
803 return (__m256i) __builtin_ia32_pbroadcastw256_mask ((__v8hi) __A,
804 (__v16hi) __O,
805 __M);
806 }
807
808 extern __inline __m256i
809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
810 _mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A)
811 {
812 return (__m256i) __builtin_ia32_pbroadcastw256_mask ((__v8hi) __A,
813 (__v16hi)
814 _mm256_avx512_setzero_si256 (),
815 __M);
816 }
817
818 extern __inline __m256i
819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
820 _mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A)
821 {
822 return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A,
823 (__v16hi) __O,
824 __M);
825 }
826
827 extern __inline __m256i
828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
829 _mm256_maskz_set1_epi16 (__mmask16 __M, short __A)
830 {
831 return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A,
832 (__v16hi)
833 _mm256_avx512_setzero_si256 (),
834 __M);
835 }
836
837 extern __inline __m128i
838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
839 _mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
840 {
841 return (__m128i) __builtin_ia32_pbroadcastw128_mask ((__v8hi) __A,
842 (__v8hi) __O,
843 __M);
844 }
845
846 extern __inline __m128i
847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
848 _mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A)
849 {
850 return (__m128i) __builtin_ia32_pbroadcastw128_mask ((__v8hi) __A,
851 (__v8hi)
852 _mm_avx512_setzero_si128 (),
853 __M);
854 }
855
856 extern __inline __m128i
857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
858 _mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A)
859 {
860 return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A,
861 (__v8hi) __O,
862 __M);
863 }
864
865 extern __inline __m128i
866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
867 _mm_maskz_set1_epi16 (__mmask8 __M, short __A)
868 {
869 return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A,
870 (__v8hi)
871 _mm_avx512_setzero_si128 (),
872 __M);
873 }
874
875 extern __inline __m256i
876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
877 _mm256_permutexvar_epi16 (__m256i __A, __m256i __B)
878 {
879 return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
880 (__v16hi) __A,
881 (__v16hi)
882 _mm256_avx512_setzero_si256 (),
883 (__mmask16) -1);
884 }
885
886 extern __inline __m256i
887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
888 _mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A,
889 __m256i __B)
890 {
891 return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
892 (__v16hi) __A,
893 (__v16hi)
894 _mm256_avx512_setzero_si256 (),
895 (__mmask16) __M);
896 }
897
898 extern __inline __m256i
899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
900 _mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
901 __m256i __B)
902 {
903 return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
904 (__v16hi) __A,
905 (__v16hi) __W,
906 (__mmask16) __M);
907 }
908
909 extern __inline __m128i
910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
911 _mm_permutexvar_epi16 (__m128i __A, __m128i __B)
912 {
913 return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
914 (__v8hi) __A,
915 (__v8hi)
916 _mm_avx512_setzero_si128 (),
917 (__mmask8) -1);
918 }
919
920 extern __inline __m128i
921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
922 _mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
923 {
924 return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
925 (__v8hi) __A,
926 (__v8hi)
927 _mm_avx512_setzero_si128 (),
928 (__mmask8) __M);
929 }
930
931 extern __inline __m128i
932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
933 _mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
934 __m128i __B)
935 {
936 return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
937 (__v8hi) __A,
938 (__v8hi) __W,
939 (__mmask8) __M);
940 }
941
942 extern __inline __m256i
943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
944 _mm256_permutex2var_epi16 (__m256i __A, __m256i __I, __m256i __B)
945 {
946 return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I
947 /* idx */ ,
948 (__v16hi) __A,
949 (__v16hi) __B,
950 (__mmask16) -1);
951 }
952
953 extern __inline __m256i
954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
955 _mm256_mask_permutex2var_epi16 (__m256i __A, __mmask16 __U,
956 __m256i __I, __m256i __B)
957 {
958 return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I
959 /* idx */ ,
960 (__v16hi) __A,
961 (__v16hi) __B,
962 (__mmask16)
963 __U);
964 }
965
966 extern __inline __m256i
967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
968 _mm256_mask2_permutex2var_epi16 (__m256i __A, __m256i __I,
969 __mmask16 __U, __m256i __B)
970 {
971 return (__m256i) __builtin_ia32_vpermi2varhi256_mask ((__v16hi) __A,
972 (__v16hi) __I
973 /* idx */ ,
974 (__v16hi) __B,
975 (__mmask16)
976 __U);
977 }
978
979 extern __inline __m256i
980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
981 _mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A,
982 __m256i __I, __m256i __B)
983 {
984 return (__m256i) __builtin_ia32_vpermt2varhi256_maskz ((__v16hi) __I
985 /* idx */ ,
986 (__v16hi) __A,
987 (__v16hi) __B,
988 (__mmask16)
989 __U);
990 }
991
992 extern __inline __m128i
993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
994 _mm_permutex2var_epi16 (__m128i __A, __m128i __I, __m128i __B)
995 {
996 return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I
997 /* idx */ ,
998 (__v8hi) __A,
999 (__v8hi) __B,
1000 (__mmask8) -1);
1001 }
1002
1003 extern __inline __m128i
1004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1005 _mm_mask_permutex2var_epi16 (__m128i __A, __mmask8 __U, __m128i __I,
1006 __m128i __B)
1007 {
1008 return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I
1009 /* idx */ ,
1010 (__v8hi) __A,
1011 (__v8hi) __B,
1012 (__mmask8)
1013 __U);
1014 }
1015
1016 extern __inline __m128i
1017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1018 _mm_mask2_permutex2var_epi16 (__m128i __A, __m128i __I, __mmask8 __U,
1019 __m128i __B)
1020 {
1021 return (__m128i) __builtin_ia32_vpermi2varhi128_mask ((__v8hi) __A,
1022 (__v8hi) __I
1023 /* idx */ ,
1024 (__v8hi) __B,
1025 (__mmask8)
1026 __U);
1027 }
1028
1029 extern __inline __m128i
1030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1031 _mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I,
1032 __m128i __B)
1033 {
1034 return (__m128i) __builtin_ia32_vpermt2varhi128_maskz ((__v8hi) __I
1035 /* idx */ ,
1036 (__v8hi) __A,
1037 (__v8hi) __B,
1038 (__mmask8)
1039 __U);
1040 }
1041
1042 extern __inline __m256i
1043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1044 _mm256_mask_maddubs_epi16 (__m256i __W, __mmask16 __U, __m256i __X,
1045 __m256i __Y)
1046 {
1047 return (__m256i) __builtin_ia32_pmaddubsw256_mask ((__v32qi) __X,
1048 (__v32qi) __Y,
1049 (__v16hi) __W,
1050 (__mmask16) __U);
1051 }
1052
1053 extern __inline __m256i
1054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1055 _mm256_maskz_maddubs_epi16 (__mmask16 __U, __m256i __X, __m256i __Y)
1056 {
1057 return (__m256i) __builtin_ia32_pmaddubsw256_mask ((__v32qi) __X,
1058 (__v32qi) __Y,
1059 (__v16hi)
1060 _mm256_avx512_setzero_si256 (),
1061 (__mmask16) __U);
1062 }
1063
1064 extern __inline __m128i
1065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1066 _mm_mask_maddubs_epi16 (__m128i __W, __mmask8 __U, __m128i __X,
1067 __m128i __Y)
1068 {
1069 return (__m128i) __builtin_ia32_pmaddubsw128_mask ((__v16qi) __X,
1070 (__v16qi) __Y,
1071 (__v8hi) __W,
1072 (__mmask8) __U);
1073 }
1074
1075 extern __inline __m128i
1076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1077 _mm_maskz_maddubs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y)
1078 {
1079 return (__m128i) __builtin_ia32_pmaddubsw128_mask ((__v16qi) __X,
1080 (__v16qi) __Y,
1081 (__v8hi)
1082 _mm_avx512_setzero_si128 (),
1083 (__mmask8) __U);
1084 }
1085
1086 extern __inline __m256i
1087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1088 _mm256_mask_madd_epi16 (__m256i __W, __mmask8 __U, __m256i __A,
1089 __m256i __B)
1090 {
1091 return (__m256i) __builtin_ia32_pmaddwd256_mask ((__v16hi) __A,
1092 (__v16hi) __B,
1093 (__v8si) __W,
1094 (__mmask8) __U);
1095 }
1096
1097 extern __inline __m256i
1098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1099 _mm256_maskz_madd_epi16 (__mmask8 __U, __m256i __A, __m256i __B)
1100 {
1101 return (__m256i) __builtin_ia32_pmaddwd256_mask ((__v16hi) __A,
1102 (__v16hi) __B,
1103 (__v8si)
1104 _mm256_avx512_setzero_si256 (),
1105 (__mmask8) __U);
1106 }
1107
1108 extern __inline __m128i
1109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1110 _mm_mask_madd_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
1111 __m128i __B)
1112 {
1113 return (__m128i) __builtin_ia32_pmaddwd128_mask ((__v8hi) __A,
1114 (__v8hi) __B,
1115 (__v4si) __W,
1116 (__mmask8) __U);
1117 }
1118
1119 extern __inline __m128i
1120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1121 _mm_maskz_madd_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
1122 {
1123 return (__m128i) __builtin_ia32_pmaddwd128_mask ((__v8hi) __A,
1124 (__v8hi) __B,
1125 (__v4si)
1126 _mm_avx512_setzero_si128 (),
1127 (__mmask8) __U);
1128 }
1129
1130 extern __inline __mmask16
1131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1132 _mm_movepi8_mask (__m128i __A)
1133 {
1134 return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
1135 }
1136
1137 extern __inline __mmask32
1138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1139 _mm256_movepi8_mask (__m256i __A)
1140 {
1141 return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
1142 }
1143
1144 extern __inline __mmask8
1145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1146 _mm_movepi16_mask (__m128i __A)
1147 {
1148 return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A);
1149 }
1150
1151 extern __inline __mmask16
1152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1153 _mm256_movepi16_mask (__m256i __A)
1154 {
1155 return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A);
1156 }
1157
1158 extern __inline __m128i
1159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1160 _mm_movm_epi8 (__mmask16 __A)
1161 {
1162 return (__m128i) __builtin_ia32_cvtmask2b128 (__A);
1163 }
1164
1165 extern __inline __m256i
1166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1167 _mm256_movm_epi8 (__mmask32 __A)
1168 {
1169 return (__m256i) __builtin_ia32_cvtmask2b256 (__A);
1170 }
1171
1172 extern __inline __m128i
1173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1174 _mm_movm_epi16 (__mmask8 __A)
1175 {
1176 return (__m128i) __builtin_ia32_cvtmask2w128 (__A);
1177 }
1178
1179 extern __inline __m256i
1180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1181 _mm256_movm_epi16 (__mmask16 __A)
1182 {
1183 return (__m256i) __builtin_ia32_cvtmask2w256 (__A);
1184 }
1185
1186 extern __inline __mmask16
1187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1188 _mm_test_epi8_mask (__m128i __A, __m128i __B)
1189 {
1190 return (__mmask16) __builtin_ia32_ptestmb128 ((__v16qi) __A,
1191 (__v16qi) __B,
1192 (__mmask16) -1);
1193 }
1194
1195 extern __inline __mmask16
1196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1197 _mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
1198 {
1199 return (__mmask16) __builtin_ia32_ptestmb128 ((__v16qi) __A,
1200 (__v16qi) __B, __U);
1201 }
1202
1203 extern __inline __mmask32
1204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1205 _mm256_test_epi8_mask (__m256i __A, __m256i __B)
1206 {
1207 return (__mmask32) __builtin_ia32_ptestmb256 ((__v32qi) __A,
1208 (__v32qi) __B,
1209 (__mmask32) -1);
1210 }
1211
1212 extern __inline __mmask32
1213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1214 _mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
1215 {
1216 return (__mmask32) __builtin_ia32_ptestmb256 ((__v32qi) __A,
1217 (__v32qi) __B, __U);
1218 }
1219
1220 extern __inline __mmask8
1221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1222 _mm_test_epi16_mask (__m128i __A, __m128i __B)
1223 {
1224 return (__mmask8) __builtin_ia32_ptestmw128 ((__v8hi) __A,
1225 (__v8hi) __B,
1226 (__mmask8) -1);
1227 }
1228
1229 extern __inline __mmask8
1230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1231 _mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
1232 {
1233 return (__mmask8) __builtin_ia32_ptestmw128 ((__v8hi) __A,
1234 (__v8hi) __B, __U);
1235 }
1236
1237 extern __inline __mmask16
1238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1239 _mm256_test_epi16_mask (__m256i __A, __m256i __B)
1240 {
1241 return (__mmask16) __builtin_ia32_ptestmw256 ((__v16hi) __A,
1242 (__v16hi) __B,
1243 (__mmask16) -1);
1244 }
1245
1246 extern __inline __mmask16
1247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1248 _mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
1249 {
1250 return (__mmask16) __builtin_ia32_ptestmw256 ((__v16hi) __A,
1251 (__v16hi) __B, __U);
1252 }
1253
1254 extern __inline __m256i
1255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1256 _mm256_maskz_min_epu16 (__mmask16 __M, __m256i __A, __m256i __B)
1257 {
1258 return (__m256i) __builtin_ia32_pminuw256_mask ((__v16hi) __A,
1259 (__v16hi) __B,
1260 (__v16hi)
1261 _mm256_avx512_setzero_si256 (),
1262 (__mmask16) __M);
1263 }
1264
1265 extern __inline __m256i
1266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1267 _mm256_mask_min_epu16 (__m256i __W, __mmask16 __M, __m256i __A,
1268 __m256i __B)
1269 {
1270 return (__m256i) __builtin_ia32_pminuw256_mask ((__v16hi) __A,
1271 (__v16hi) __B,
1272 (__v16hi) __W,
1273 (__mmask16) __M);
1274 }
1275
1276 extern __inline __m128i
1277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1278 _mm_maskz_min_epu16 (__mmask8 __M, __m128i __A, __m128i __B)
1279 {
1280 return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A,
1281 (__v8hi) __B,
1282 (__v8hi)
1283 _mm_avx512_setzero_si128 (),
1284 (__mmask8) __M);
1285 }
1286
1287 extern __inline __m128i
1288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1289 _mm_mask_min_epu16 (__m128i __W, __mmask8 __M, __m128i __A,
1290 __m128i __B)
1291 {
1292 return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A,
1293 (__v8hi) __B,
1294 (__v8hi) __W,
1295 (__mmask8) __M);
1296 }
1297
1298 extern __inline __m256i
1299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1300 _mm256_maskz_min_epi16 (__mmask16 __M, __m256i __A, __m256i __B)
1301 {
1302 return (__m256i) __builtin_ia32_pminsw256_mask ((__v16hi) __A,
1303 (__v16hi) __B,
1304 (__v16hi)
1305 _mm256_avx512_setzero_si256 (),
1306 (__mmask16) __M);
1307 }
1308
1309 extern __inline __m256i
1310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1311 _mm256_mask_min_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
1312 __m256i __B)
1313 {
1314 return (__m256i) __builtin_ia32_pminsw256_mask ((__v16hi) __A,
1315 (__v16hi) __B,
1316 (__v16hi) __W,
1317 (__mmask16) __M);
1318 }
1319
1320 extern __inline __m256i
1321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1322 _mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
1323 {
1324 return (__m256i) __builtin_ia32_pmaxub256_mask ((__v32qi) __A,
1325 (__v32qi) __B,
1326 (__v32qi)
1327 _mm256_avx512_setzero_si256 (),
1328 (__mmask32) __M);
1329 }
1330
1331 extern __inline __m256i
1332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1333 _mm256_mask_max_epu8 (__m256i __W, __mmask32 __M, __m256i __A,
1334 __m256i __B)
1335 {
1336 return (__m256i) __builtin_ia32_pmaxub256_mask ((__v32qi) __A,
1337 (__v32qi) __B,
1338 (__v32qi) __W,
1339 (__mmask32) __M);
1340 }
1341
1342 extern __inline __m128i
1343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1344 _mm_maskz_max_epu8 (__mmask16 __M, __m128i __A, __m128i __B)
1345 {
1346 return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A,
1347 (__v16qi) __B,
1348 (__v16qi)
1349 _mm_avx512_setzero_si128 (),
1350 (__mmask16) __M);
1351 }
1352
1353 extern __inline __m128i
1354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1355 _mm_mask_max_epu8 (__m128i __W, __mmask16 __M, __m128i __A,
1356 __m128i __B)
1357 {
1358 return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A,
1359 (__v16qi) __B,
1360 (__v16qi) __W,
1361 (__mmask16) __M);
1362 }
1363
1364 extern __inline __m256i
1365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1366 _mm256_maskz_max_epi8 (__mmask32 __M, __m256i __A, __m256i __B)
1367 {
1368 return (__m256i) __builtin_ia32_pmaxsb256_mask ((__v32qi) __A,
1369 (__v32qi) __B,
1370 (__v32qi)
1371 _mm256_avx512_setzero_si256 (),
1372 (__mmask32) __M);
1373 }
1374
1375 extern __inline __m256i
1376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1377 _mm256_mask_max_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
1378 __m256i __B)
1379 {
1380 return (__m256i) __builtin_ia32_pmaxsb256_mask ((__v32qi) __A,
1381 (__v32qi) __B,
1382 (__v32qi) __W,
1383 (__mmask32) __M);
1384 }
1385
1386 extern __inline __m128i
1387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1388 _mm_maskz_max_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
1389 {
1390 return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A,
1391 (__v16qi) __B,
1392 (__v16qi)
1393 _mm_avx512_setzero_si128 (),
1394 (__mmask16) __M);
1395 }
1396
1397 extern __inline __m128i
1398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1399 _mm_mask_max_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
1400 __m128i __B)
1401 {
1402 return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A,
1403 (__v16qi) __B,
1404 (__v16qi) __W,
1405 (__mmask16) __M);
1406 }
1407
1408 extern __inline __m256i
1409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1410 _mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
1411 {
1412 return (__m256i) __builtin_ia32_pminub256_mask ((__v32qi) __A,
1413 (__v32qi) __B,
1414 (__v32qi)
1415 _mm256_avx512_setzero_si256 (),
1416 (__mmask32) __M);
1417 }
1418
1419 extern __inline __m256i
1420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1421 _mm256_mask_min_epu8 (__m256i __W, __mmask32 __M, __m256i __A,
1422 __m256i __B)
1423 {
1424 return (__m256i) __builtin_ia32_pminub256_mask ((__v32qi) __A,
1425 (__v32qi) __B,
1426 (__v32qi) __W,
1427 (__mmask32) __M);
1428 }
1429
1430 extern __inline __m128i
1431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1432 _mm_maskz_min_epu8 (__mmask16 __M, __m128i __A, __m128i __B)
1433 {
1434 return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A,
1435 (__v16qi) __B,
1436 (__v16qi)
1437 _mm_avx512_setzero_si128 (),
1438 (__mmask16) __M);
1439 }
1440
1441 extern __inline __m128i
1442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1443 _mm_mask_min_epu8 (__m128i __W, __mmask16 __M, __m128i __A,
1444 __m128i __B)
1445 {
1446 return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A,
1447 (__v16qi) __B,
1448 (__v16qi) __W,
1449 (__mmask16) __M);
1450 }
1451
1452 extern __inline __m256i
1453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1454 _mm256_maskz_min_epi8 (__mmask32 __M, __m256i __A, __m256i __B)
1455 {
1456 return (__m256i) __builtin_ia32_pminsb256_mask ((__v32qi) __A,
1457 (__v32qi) __B,
1458 (__v32qi)
1459 _mm256_avx512_setzero_si256 (),
1460 (__mmask32) __M);
1461 }
1462
1463 extern __inline __m256i
1464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1465 _mm256_mask_min_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
1466 __m256i __B)
1467 {
1468 return (__m256i) __builtin_ia32_pminsb256_mask ((__v32qi) __A,
1469 (__v32qi) __B,
1470 (__v32qi) __W,
1471 (__mmask32) __M);
1472 }
1473
1474 extern __inline __m128i
1475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1476 _mm_maskz_min_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
1477 {
1478 return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A,
1479 (__v16qi) __B,
1480 (__v16qi)
1481 _mm_avx512_setzero_si128 (),
1482 (__mmask16) __M);
1483 }
1484
1485 extern __inline __m128i
1486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1487 _mm_mask_min_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
1488 __m128i __B)
1489 {
1490 return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A,
1491 (__v16qi) __B,
1492 (__v16qi) __W,
1493 (__mmask16) __M);
1494 }
1495
1496 extern __inline __m256i
1497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1498 _mm256_maskz_max_epi16 (__mmask16 __M, __m256i __A, __m256i __B)
1499 {
1500 return (__m256i) __builtin_ia32_pmaxsw256_mask ((__v16hi) __A,
1501 (__v16hi) __B,
1502 (__v16hi)
1503 _mm256_avx512_setzero_si256 (),
1504 (__mmask16) __M);
1505 }
1506
1507 extern __inline __m256i
1508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1509 _mm256_mask_max_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
1510 __m256i __B)
1511 {
1512 return (__m256i) __builtin_ia32_pmaxsw256_mask ((__v16hi) __A,
1513 (__v16hi) __B,
1514 (__v16hi) __W,
1515 (__mmask16) __M);
1516 }
1517
1518 extern __inline __m128i
1519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1520 _mm_maskz_max_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
1521 {
1522 return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A,
1523 (__v8hi) __B,
1524 (__v8hi)
1525 _mm_avx512_setzero_si128 (),
1526 (__mmask8) __M);
1527 }
1528
1529 extern __inline __m128i
1530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1531 _mm_mask_max_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
1532 __m128i __B)
1533 {
1534 return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A,
1535 (__v8hi) __B,
1536 (__v8hi) __W,
1537 (__mmask8) __M);
1538 }
1539
1540 extern __inline __m256i
1541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1542 _mm256_maskz_max_epu16 (__mmask16 __M, __m256i __A, __m256i __B)
1543 {
1544 return (__m256i) __builtin_ia32_pmaxuw256_mask ((__v16hi) __A,
1545 (__v16hi) __B,
1546 (__v16hi)
1547 _mm256_avx512_setzero_si256 (),
1548 (__mmask16) __M);
1549 }
1550
1551 extern __inline __m256i
1552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1553 _mm256_mask_max_epu16 (__m256i __W, __mmask16 __M, __m256i __A,
1554 __m256i __B)
1555 {
1556 return (__m256i) __builtin_ia32_pmaxuw256_mask ((__v16hi) __A,
1557 (__v16hi) __B,
1558 (__v16hi) __W,
1559 (__mmask16) __M);
1560 }
1561
1562 extern __inline __m128i
1563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1564 _mm_maskz_max_epu16 (__mmask8 __M, __m128i __A, __m128i __B)
1565 {
1566 return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A,
1567 (__v8hi) __B,
1568 (__v8hi)
1569 _mm_avx512_setzero_si128 (),
1570 (__mmask8) __M);
1571 }
1572
1573 extern __inline __m128i
1574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1575 _mm_mask_max_epu16 (__m128i __W, __mmask8 __M, __m128i __A,
1576 __m128i __B)
1577 {
1578 return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A,
1579 (__v8hi) __B,
1580 (__v8hi) __W,
1581 (__mmask8) __M);
1582 }
1583
1584 extern __inline __m128i
1585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1586 _mm_maskz_min_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
1587 {
1588 return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A,
1589 (__v8hi) __B,
1590 (__v8hi)
1591 _mm_avx512_setzero_si128 (),
1592 (__mmask8) __M);
1593 }
1594
1595 extern __inline __m128i
1596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1597 _mm_mask_min_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
1598 __m128i __B)
1599 {
1600 return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A,
1601 (__v8hi) __B,
1602 (__v8hi) __W,
1603 (__mmask8) __M);
1604 }
1605
1606 #ifdef __OPTIMIZE__
1607 extern __inline __m256i
1608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1609 _mm256_mask_alignr_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
1610 __m256i __B, const int __N)
1611 {
1612 return (__m256i) __builtin_ia32_palignr256_mask ((__v4di) __A,
1613 (__v4di) __B,
1614 __N * 8,
1615 (__v4di) __W,
1616 (__mmask32) __U);
1617 }
1618
1619 extern __inline __m256i
1620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1621 _mm256_maskz_alignr_epi8 (__mmask32 __U, __m256i __A, __m256i __B,
1622 const int __N)
1623 {
1624 return (__m256i) __builtin_ia32_palignr256_mask ((__v4di) __A,
1625 (__v4di) __B,
1626 __N * 8,
1627 (__v4di)
1628 _mm256_avx512_setzero_si256 (),
1629 (__mmask32) __U);
1630 }
1631
1632 extern __inline __m128i
1633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1634 _mm_mask_alignr_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
1635 __m128i __B, const int __N)
1636 {
1637 return (__m128i) __builtin_ia32_palignr128_mask ((__v2di) __A,
1638 (__v2di) __B,
1639 __N * 8,
1640 (__v2di) __W,
1641 (__mmask16) __U);
1642 }
1643
1644 extern __inline __m128i
1645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1646 _mm_maskz_alignr_epi8 (__mmask16 __U, __m128i __A, __m128i __B,
1647 const int __N)
1648 {
1649 return (__m128i) __builtin_ia32_palignr128_mask ((__v2di) __A,
1650 (__v2di) __B,
1651 __N * 8,
1652 (__v2di)
1653 _mm_avx512_setzero_si128 (),
1654 (__mmask16) __U);
1655 }
1656
1657 extern __inline __m256i
1658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1659 _mm256_dbsad_epu8 (__m256i __A, __m256i __B, const int __imm)
1660 {
1661 return (__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi) __A,
1662 (__v32qi) __B,
1663 __imm,
1664 (__v16hi)
1665 _mm256_avx512_setzero_si256 (),
1666 (__mmask16) -1);
1667 }
1668
1669 extern __inline __m256i
1670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1671 _mm256_mask_dbsad_epu8 (__m256i __W, __mmask16 __U, __m256i __A,
1672 __m256i __B, const int __imm)
1673 {
1674 return (__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi) __A,
1675 (__v32qi) __B,
1676 __imm,
1677 (__v16hi) __W,
1678 (__mmask16) __U);
1679 }
1680
1681 extern __inline __m256i
1682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1683 _mm256_maskz_dbsad_epu8 (__mmask16 __U, __m256i __A, __m256i __B,
1684 const int __imm)
1685 {
1686 return (__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi) __A,
1687 (__v32qi) __B,
1688 __imm,
1689 (__v16hi)
1690 _mm256_avx512_setzero_si256 (),
1691 (__mmask16) __U);
1692 }
1693
1694 extern __inline __m128i
1695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1696 _mm_dbsad_epu8 (__m128i __A, __m128i __B, const int __imm)
1697 {
1698 return (__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi) __A,
1699 (__v16qi) __B,
1700 __imm,
1701 (__v8hi)
1702 _mm_avx512_setzero_si128 (),
1703 (__mmask8) -1);
1704 }
1705
1706 extern __inline __m128i
1707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1708 _mm_mask_dbsad_epu8 (__m128i __W, __mmask8 __U, __m128i __A,
1709 __m128i __B, const int __imm)
1710 {
1711 return (__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi) __A,
1712 (__v16qi) __B,
1713 __imm,
1714 (__v8hi) __W,
1715 (__mmask8) __U);
1716 }
1717
1718 extern __inline __m128i
1719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1720 _mm_maskz_dbsad_epu8 (__mmask8 __U, __m128i __A, __m128i __B,
1721 const int __imm)
1722 {
1723 return (__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi) __A,
1724 (__v16qi) __B,
1725 __imm,
1726 (__v8hi)
1727 _mm_avx512_setzero_si128 (),
1728 (__mmask8) __U);
1729 }
1730
1731 extern __inline __mmask8
1732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1733 _mm_mask_cmp_epi16_mask (__mmask8 __U, __m128i __X, __m128i __Y,
1734 const int __P)
1735 {
1736 return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
1737 (__v8hi) __Y, __P,
1738 (__mmask8) __U);
1739 }
1740
1741 extern __inline __mmask8
1742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1743 _mm_cmp_epi16_mask (__m128i __X, __m128i __Y, const int __P)
1744 {
1745 return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
1746 (__v8hi) __Y, __P,
1747 (__mmask8) -1);
1748 }
1749
1750 extern __inline __mmask16
1751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1752 _mm256_mask_cmp_epi16_mask (__mmask16 __U, __m256i __X, __m256i __Y,
1753 const int __P)
1754 {
1755 return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
1756 (__v16hi) __Y, __P,
1757 (__mmask16) __U);
1758 }
1759
1760 extern __inline __mmask16
1761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1762 _mm256_cmp_epi16_mask (__m256i __X, __m256i __Y, const int __P)
1763 {
1764 return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
1765 (__v16hi) __Y, __P,
1766 (__mmask16) -1);
1767 }
1768
1769 extern __inline __mmask16
1770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1771 _mm_mask_cmp_epi8_mask (__mmask16 __U, __m128i __X, __m128i __Y,
1772 const int __P)
1773 {
1774 return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
1775 (__v16qi) __Y, __P,
1776 (__mmask16) __U);
1777 }
1778
1779 extern __inline __mmask16
1780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1781 _mm_cmp_epi8_mask (__m128i __X, __m128i __Y, const int __P)
1782 {
1783 return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
1784 (__v16qi) __Y, __P,
1785 (__mmask16) -1);
1786 }
1787
1788 extern __inline __mmask32
1789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1790 _mm256_mask_cmp_epi8_mask (__mmask32 __U, __m256i __X, __m256i __Y,
1791 const int __P)
1792 {
1793 return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
1794 (__v32qi) __Y, __P,
1795 (__mmask32) __U);
1796 }
1797
1798 extern __inline __mmask32
1799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1800 _mm256_cmp_epi8_mask (__m256i __X, __m256i __Y, const int __P)
1801 {
1802 return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
1803 (__v32qi) __Y, __P,
1804 (__mmask32) -1);
1805 }
1806
1807 extern __inline __mmask8
1808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1809 _mm_mask_cmp_epu16_mask (__mmask8 __U, __m128i __X, __m128i __Y,
1810 const int __P)
1811 {
1812 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
1813 (__v8hi) __Y, __P,
1814 (__mmask8) __U);
1815 }
1816
1817 extern __inline __mmask8
1818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1819 _mm_cmp_epu16_mask (__m128i __X, __m128i __Y, const int __P)
1820 {
1821 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
1822 (__v8hi) __Y, __P,
1823 (__mmask8) -1);
1824 }
1825
1826 extern __inline __mmask16
1827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1828 _mm256_mask_cmp_epu16_mask (__mmask16 __U, __m256i __X, __m256i __Y,
1829 const int __P)
1830 {
1831 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
1832 (__v16hi) __Y, __P,
1833 (__mmask16) __U);
1834 }
1835
1836 extern __inline __mmask16
1837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1838 _mm256_cmp_epu16_mask (__m256i __X, __m256i __Y, const int __P)
1839 {
1840 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
1841 (__v16hi) __Y, __P,
1842 (__mmask16) -1);
1843 }
1844
1845 extern __inline __mmask16
1846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1847 _mm_mask_cmp_epu8_mask (__mmask16 __U, __m128i __X, __m128i __Y,
1848 const int __P)
1849 {
1850 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
1851 (__v16qi) __Y, __P,
1852 (__mmask16) __U);
1853 }
1854
1855 extern __inline __mmask16
1856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1857 _mm_cmp_epu8_mask (__m128i __X, __m128i __Y, const int __P)
1858 {
1859 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
1860 (__v16qi) __Y, __P,
1861 (__mmask16) -1);
1862 }
1863
1864 extern __inline __mmask32
1865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1866 _mm256_mask_cmp_epu8_mask (__mmask32 __U, __m256i __X, __m256i __Y,
1867 const int __P)
1868 {
1869 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
1870 (__v32qi) __Y, __P,
1871 (__mmask32) __U);
1872 }
1873
1874 extern __inline __mmask32
1875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1876 _mm256_cmp_epu8_mask (__m256i __X, __m256i __Y, const int __P)
1877 {
1878 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
1879 (__v32qi) __Y, __P,
1880 (__mmask32) -1);
1881 }
1882
1883 extern __inline __m256i
1884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1885 _mm256_mask_srli_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
1886 const int __imm)
1887 {
1888 return (__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi) __A, __imm,
1889 (__v16hi) __W,
1890 (__mmask16) __U);
1891 }
1892
1893 extern __inline __m256i
1894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1895 _mm256_maskz_srli_epi16 (__mmask16 __U, __m256i __A, const int __imm)
1896 {
1897 return (__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi) __A, __imm,
1898 (__v16hi)
1899 _mm256_avx512_setzero_si256 (),
1900 (__mmask16) __U);
1901 }
1902
1903 extern __inline __m128i
1904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1905 _mm_mask_srli_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
1906 const int __imm)
1907 {
1908 return (__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi) __A, __imm,
1909 (__v8hi) __W,
1910 (__mmask8) __U);
1911 }
1912
1913 extern __inline __m128i
1914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1915 _mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, const int __imm)
1916 {
1917 return (__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi) __A, __imm,
1918 (__v8hi)
1919 _mm_avx512_setzero_si128 (),
1920 (__mmask8) __U);
1921 }
1922
1923 extern __inline __m256i
1924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1925 _mm256_mask_shufflehi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
1926 const int __imm)
1927 {
1928 return (__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi) __A,
1929 __imm,
1930 (__v16hi) __W,
1931 (__mmask16) __U);
1932 }
1933
1934 extern __inline __m256i
1935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1936 _mm256_maskz_shufflehi_epi16 (__mmask16 __U, __m256i __A,
1937 const int __imm)
1938 {
1939 return (__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi) __A,
1940 __imm,
1941 (__v16hi)
1942 _mm256_avx512_setzero_si256 (),
1943 (__mmask16) __U);
1944 }
1945
1946 extern __inline __m128i
1947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1948 _mm_mask_shufflehi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
1949 const int __imm)
1950 {
1951 return (__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi) __A, __imm,
1952 (__v8hi) __W,
1953 (__mmask8) __U);
1954 }
1955
1956 extern __inline __m128i
1957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1958 _mm_maskz_shufflehi_epi16 (__mmask8 __U, __m128i __A, const int __imm)
1959 {
1960 return (__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi) __A, __imm,
1961 (__v8hi)
1962 _mm_avx512_setzero_si128 (),
1963 (__mmask8) __U);
1964 }
1965
1966 extern __inline __m256i
1967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1968 _mm256_mask_shufflelo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
1969 const int __imm)
1970 {
1971 return (__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi) __A,
1972 __imm,
1973 (__v16hi) __W,
1974 (__mmask16) __U);
1975 }
1976
1977 extern __inline __m256i
1978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1979 _mm256_maskz_shufflelo_epi16 (__mmask16 __U, __m256i __A,
1980 const int __imm)
1981 {
1982 return (__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi) __A,
1983 __imm,
1984 (__v16hi)
1985 _mm256_avx512_setzero_si256 (),
1986 (__mmask16) __U);
1987 }
1988
1989 extern __inline __m128i
1990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1991 _mm_mask_shufflelo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
1992 const int __imm)
1993 {
1994 return (__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi) __A, __imm,
1995 (__v8hi) __W,
1996 (__mmask8) __U);
1997 }
1998
1999 extern __inline __m128i
2000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2001 _mm_maskz_shufflelo_epi16 (__mmask8 __U, __m128i __A, const int __imm)
2002 {
2003 return (__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi) __A, __imm,
2004 (__v8hi)
2005 _mm_avx512_setzero_si128 (),
2006 (__mmask8) __U);
2007 }
2008
2009 extern __inline __m256i
2010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2011 _mm256_mask_srai_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2012 const unsigned int __imm)
2013 {
2014 return (__m256i) __builtin_ia32_psrawi256_mask ((__v16hi) __A, __imm,
2015 (__v16hi) __W,
2016 (__mmask16) __U);
2017 }
2018
2019 extern __inline __m256i
2020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2021 _mm256_maskz_srai_epi16 (__mmask16 __U, __m256i __A, const unsigned int __imm)
2022 {
2023 return (__m256i) __builtin_ia32_psrawi256_mask ((__v16hi) __A, __imm,
2024 (__v16hi)
2025 _mm256_avx512_setzero_si256 (),
2026 (__mmask16) __U);
2027 }
2028
2029 extern __inline __m128i
2030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2031 _mm_mask_srai_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
2032 const unsigned int __imm)
2033 {
2034 return (__m128i) __builtin_ia32_psrawi128_mask ((__v8hi) __A, __imm,
2035 (__v8hi) __W,
2036 (__mmask8) __U);
2037 }
2038
2039 extern __inline __m128i
2040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2041 _mm_maskz_srai_epi16 (__mmask8 __U, __m128i __A, const unsigned int __imm)
2042 {
2043 return (__m128i) __builtin_ia32_psrawi128_mask ((__v8hi) __A, __imm,
2044 (__v8hi)
2045 _mm_avx512_setzero_si128 (),
2046 (__mmask8) __U);
2047 }
2048
2049 extern __inline __m256i
2050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2051 _mm256_mask_slli_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2052 unsigned int __B)
2053 {
2054 return (__m256i) __builtin_ia32_psllwi256_mask ((__v16hi) __A, __B,
2055 (__v16hi) __W,
2056 (__mmask16) __U);
2057 }
2058
2059 extern __inline __m256i
2060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2061 _mm256_maskz_slli_epi16 (__mmask16 __U, __m256i __A, unsigned int __B)
2062 {
2063 return (__m256i) __builtin_ia32_psllwi256_mask ((__v16hi) __A, __B,
2064 (__v16hi)
2065 _mm256_avx512_setzero_si256 (),
2066 (__mmask16) __U);
2067 }
2068
2069 extern __inline __m128i
2070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2071 _mm_mask_slli_epi16 (__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
2072 {
2073 return (__m128i) __builtin_ia32_psllwi128_mask ((__v8hi) __A, __B,
2074 (__v8hi) __W,
2075 (__mmask8) __U);
2076 }
2077
2078 extern __inline __m128i
2079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2080 _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
2081 {
2082 return (__m128i) __builtin_ia32_psllwi128_mask ((__v8hi) __A, __B,
2083 (__v8hi)
2084 _mm_avx512_setzero_si128 (),
2085 (__mmask8) __U);
2086 }
2087
2088 #else
2089 #define _mm256_mask_alignr_epi8(W, U, X, Y, N) \
2090 ((__m256i) __builtin_ia32_palignr256_mask ((__v4di)(__m256i)(X), \
2091 (__v4di)(__m256i)(Y), (int)((N) * 8), \
2092 (__v4di)(__m256i)(X), (__mmask32)(U)))
2093
2094 #define _mm256_mask_srli_epi16(W, U, A, B) \
2095 ((__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi)(__m256i)(A), \
2096 (int)(B), (__v16hi)(__m256i)(W), (__mmask16)(U)))
2097
2098 #define _mm256_maskz_srli_epi16(U, A, B) \
2099 ((__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi)(__m256i)(A), \
2100 (int)(B), (__v16hi)_mm256_avx512_setzero_si256 (), (__mmask16)(U)))
2101
2102 #define _mm_mask_srli_epi16(W, U, A, B) \
2103 ((__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi)(__m128i)(A), \
2104 (int)(B), (__v8hi)(__m128i)(W), (__mmask8)(U)))
2105
2106 #define _mm_maskz_srli_epi16(U, A, B) \
2107 ((__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi)(__m128i)(A), \
2108 (int)(B), (__v8hi)_mm_avx512_setzero_si128 (), (__mmask8)(U)))
2109
2110 #define _mm256_mask_srai_epi16(W, U, A, B) \
2111 ((__m256i) __builtin_ia32_psrawi256_mask ((__v16hi)(__m256i)(A), \
2112 (unsigned int)(B), (__v16hi)(__m256i)(W), (__mmask16)(U)))
2113
2114 #define _mm256_maskz_srai_epi16(U, A, B) \
2115 ((__m256i) __builtin_ia32_psrawi256_mask ((__v16hi)(__m256i)(A), \
2116 (unsigned int)(B), (__v16hi)_mm256_avx512_setzero_si256 (), (__mmask16)(U)))
2117
2118 #define _mm_mask_srai_epi16(W, U, A, B) \
2119 ((__m128i) __builtin_ia32_psrawi128_mask ((__v8hi)(__m128i)(A), \
2120 (unsigned int)(B), (__v8hi)(__m128i)(W), (__mmask8)(U)))
2121
2122 #define _mm_maskz_srai_epi16(U, A, B) \
2123 ((__m128i) __builtin_ia32_psrawi128_mask ((__v8hi)(__m128i)(A), \
2124 (unsigned int)(B), (__v8hi)_mm_avx512_setzero_si128(), (__mmask8)(U)))
2125
2126 #define _mm256_mask_shufflehi_epi16(W, U, A, B) \
2127 ((__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi)(__m256i)(A), (int)(B), \
2128 (__v16hi)(__m256i)(W), \
2129 (__mmask16)(U)))
2130
2131 #define _mm256_maskz_shufflehi_epi16(U, A, B) \
2132 ((__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi)(__m256i)(A), (int)(B), \
2133 (__v16hi)(__m256i)_mm256_avx512_setzero_si256 (), \
2134 (__mmask16)(U)))
2135
2136 #define _mm_mask_shufflehi_epi16(W, U, A, B) \
2137 ((__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi)(__m128i)(A), (int)(B), \
2138 (__v8hi)(__m128i)(W), \
2139 (__mmask8)(U)))
2140
2141 #define _mm_maskz_shufflehi_epi16(U, A, B) \
2142 ((__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi)(__m128i)(A), (int)(B), \
2143 (__v8hi)(__m128i)_mm_avx512_setzero_si128 (), \
2144 (__mmask8)(U)))
2145
2146 #define _mm256_mask_shufflelo_epi16(W, U, A, B) \
2147 ((__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi)(__m256i)(A), (int)(B), \
2148 (__v16hi)(__m256i)(W), \
2149 (__mmask16)(U)))
2150
2151 #define _mm256_maskz_shufflelo_epi16(U, A, B) \
2152 ((__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi)(__m256i)(A), (int)(B), \
2153 (__v16hi)(__m256i)_mm256_avx512_setzero_si256 (), \
2154 (__mmask16)(U)))
2155
2156 #define _mm_mask_shufflelo_epi16(W, U, A, B) \
2157 ((__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi)(__m128i)(A), (int)(B), \
2158 (__v8hi)(__m128i)(W), \
2159 (__mmask8)(U)))
2160
2161 #define _mm_maskz_shufflelo_epi16(U, A, B) \
2162 ((__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi)(__m128i)(A), (int)(B), \
2163 (__v8hi)(__m128i)_mm_avx512_setzero_si128 (), \
2164 (__mmask8)(U)))
2165
2166 #define _mm256_maskz_alignr_epi8(U, X, Y, N) \
2167 ((__m256i) __builtin_ia32_palignr256_mask ((__v4di)(__m256i)(X), \
2168 (__v4di)(__m256i)(Y), (int)((N) * 8), \
2169 (__v4di)(__m256i)_mm256_avx512_setzero_si256 (), \
2170 (__mmask32)(U)))
2171
2172 #define _mm_mask_alignr_epi8(W, U, X, Y, N) \
2173 ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X), \
2174 (__v2di)(__m128i)(Y), (int)((N) * 8), \
2175 (__v2di)(__m128i)(X), (__mmask16)(U)))
2176
2177 #define _mm_maskz_alignr_epi8(U, X, Y, N) \
2178 ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X), \
2179 (__v2di)(__m128i)(Y), (int)((N) * 8), \
2180 (__v2di)(__m128i)_mm_avx512_setzero_si128 (), \
2181 (__mmask16)(U)))
2182
2183 #define _mm_mask_slli_epi16(W, U, X, C) \
2184 ((__m128i)__builtin_ia32_psllwi128_mask ((__v8hi)(__m128i)(X), \
2185 (unsigned int)(C), \
2186 (__v8hi)(__m128i)(W), \
2187 (__mmask8)(U)))
2188
2189 #define _mm_maskz_slli_epi16(U, X, C) \
2190 ((__m128i)__builtin_ia32_psllwi128_mask ((__v8hi)(__m128i)(X), \
2191 (unsigned int)(C), \
2192 (__v8hi)(__m128i)_mm_avx512_setzero_si128 (), \
2193 (__mmask8)(U)))
2194
2195 #define _mm256_dbsad_epu8(X, Y, C) \
2196 ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X), \
2197 (__v32qi)(__m256i) (Y), (int) (C), \
2198 (__v16hi)(__m256i)_mm256_avx512_setzero_si256(),\
2199 (__mmask16)-1))
2200
2201 #define _mm256_mask_slli_epi16(W, U, X, C) \
2202 ((__m256i)__builtin_ia32_psllwi256_mask ((__v16hi)(__m256i)(X), \
2203 (unsigned int)(C), \
2204 (__v16hi)(__m256i)(W), \
2205 (__mmask16)(U)))
2206
2207 #define _mm256_maskz_slli_epi16(U, X, C) \
2208 ((__m256i)__builtin_ia32_psllwi256_mask ((__v16hi)(__m256i)(X), \
2209 (unsigned int)(C), \
2210 (__v16hi)(__m256i)_mm256_avx512_setzero_si256 (), \
2211 (__mmask16)(U)))
2212
2213 #define _mm256_mask_dbsad_epu8(W, U, X, Y, C) \
2214 ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X), \
2215 (__v32qi)(__m256i) (Y), (int) (C), \
2216 (__v16hi)(__m256i)(W), \
2217 (__mmask16)(U)))
2218
2219 #define _mm256_maskz_dbsad_epu8(U, X, Y, C) \
2220 ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X), \
2221 (__v32qi)(__m256i) (Y), (int) (C), \
2222 (__v16hi)(__m256i)_mm256_avx512_setzero_si256(),\
2223 (__mmask16)(U)))
2224
2225 #define _mm_dbsad_epu8(X, Y, C) \
2226 ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X), \
2227 (__v16qi)(__m128i) (Y), (int) (C), \
2228 (__v8hi)(__m128i)_mm_avx512_setzero_si128(), \
2229 (__mmask8)-1))
2230
2231 #define _mm_mask_dbsad_epu8(W, U, X, Y, C) \
2232 ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X), \
2233 (__v16qi)(__m128i) (Y), (int) (C), \
2234 (__v8hi)(__m128i)(W), \
2235 (__mmask8)(U)))
2236
2237 #define _mm_maskz_dbsad_epu8(U, X, Y, C) \
2238 ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X), \
2239 (__v16qi)(__m128i) (Y), (int) (C), \
2240 (__v8hi)(__m128i)_mm_avx512_setzero_si128(), \
2241 (__mmask8)(U)))
2242
2243 #define _mm_cmp_epi16_mask(X, Y, P) \
2244 ((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X), \
2245 (__v8hi)(__m128i)(Y), (int)(P),\
2246 (__mmask8)(-1)))
2247
2248 #define _mm_cmp_epi8_mask(X, Y, P) \
2249 ((__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi)(__m128i)(X), \
2250 (__v16qi)(__m128i)(Y), (int)(P),\
2251 (__mmask16)(-1)))
2252
2253 #define _mm256_cmp_epi16_mask(X, Y, P) \
2254 ((__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi)(__m256i)(X), \
2255 (__v16hi)(__m256i)(Y), (int)(P),\
2256 (__mmask16)(-1)))
2257
2258 #define _mm256_cmp_epi8_mask(X, Y, P) \
2259 ((__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi)(__m256i)(X), \
2260 (__v32qi)(__m256i)(Y), (int)(P),\
2261 (__mmask32)(-1)))
2262
2263 #define _mm_cmp_epu16_mask(X, Y, P) \
2264 ((__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi)(__m128i)(X), \
2265 (__v8hi)(__m128i)(Y), (int)(P),\
2266 (__mmask8)(-1)))
2267
2268 #define _mm_cmp_epu8_mask(X, Y, P) \
2269 ((__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi)(__m128i)(X), \
2270 (__v16qi)(__m128i)(Y), (int)(P),\
2271 (__mmask16)(-1)))
2272
2273 #define _mm256_cmp_epu16_mask(X, Y, P) \
2274 ((__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi)(__m256i)(X), \
2275 (__v16hi)(__m256i)(Y), (int)(P),\
2276 (__mmask16)(-1)))
2277
2278 #define _mm256_cmp_epu8_mask(X, Y, P) \
2279 ((__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi)(__m256i)(X), \
2280 (__v32qi)(__m256i)(Y), (int)(P),\
2281 (__mmask32)-1))
2282
2283 #define _mm_mask_cmp_epi16_mask(M, X, Y, P) \
2284 ((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X), \
2285 (__v8hi)(__m128i)(Y), (int)(P),\
2286 (__mmask8)(M)))
2287
2288 #define _mm_mask_cmp_epi8_mask(M, X, Y, P) \
2289 ((__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi)(__m128i)(X), \
2290 (__v16qi)(__m128i)(Y), (int)(P),\
2291 (__mmask16)(M)))
2292
2293 #define _mm256_mask_cmp_epi16_mask(M, X, Y, P) \
2294 ((__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi)(__m256i)(X), \
2295 (__v16hi)(__m256i)(Y), (int)(P),\
2296 (__mmask16)(M)))
2297
2298 #define _mm256_mask_cmp_epi8_mask(M, X, Y, P) \
2299 ((__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi)(__m256i)(X), \
2300 (__v32qi)(__m256i)(Y), (int)(P),\
2301 (__mmask32)(M)))
2302
2303 #define _mm_mask_cmp_epu16_mask(M, X, Y, P) \
2304 ((__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi)(__m128i)(X), \
2305 (__v8hi)(__m128i)(Y), (int)(P),\
2306 (__mmask8)(M)))
2307
2308 #define _mm_mask_cmp_epu8_mask(M, X, Y, P) \
2309 ((__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi)(__m128i)(X), \
2310 (__v16qi)(__m128i)(Y), (int)(P),\
2311 (__mmask16)(M)))
2312
2313 #define _mm256_mask_cmp_epu16_mask(M, X, Y, P) \
2314 ((__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi)(__m256i)(X), \
2315 (__v16hi)(__m256i)(Y), (int)(P),\
2316 (__mmask16)(M)))
2317
2318 #define _mm256_mask_cmp_epu8_mask(M, X, Y, P) \
2319 ((__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi)(__m256i)(X), \
2320 (__v32qi)(__m256i)(Y), (int)(P),\
2321 (__mmask32)(M)))
2322 #endif
2323
2324 extern __inline __mmask32
2325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2326 _mm256_cmpneq_epi8_mask (__m256i __X, __m256i __Y)
2327 {
2328 return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
2329 (__v32qi) __Y, 4,
2330 (__mmask32) -1);
2331 }
2332
2333 extern __inline __mmask32
2334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2335 _mm256_cmplt_epi8_mask (__m256i __X, __m256i __Y)
2336 {
2337 return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
2338 (__v32qi) __Y, 1,
2339 (__mmask32) -1);
2340 }
2341
2342 extern __inline __mmask32
2343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2344 _mm256_cmpge_epi8_mask (__m256i __X, __m256i __Y)
2345 {
2346 return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
2347 (__v32qi) __Y, 5,
2348 (__mmask32) -1);
2349 }
2350
2351 extern __inline __mmask32
2352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2353 _mm256_cmple_epi8_mask (__m256i __X, __m256i __Y)
2354 {
2355 return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
2356 (__v32qi) __Y, 2,
2357 (__mmask32) -1);
2358 }
2359
2360 extern __inline __mmask16
2361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2362 _mm256_cmpneq_epi16_mask (__m256i __X, __m256i __Y)
2363 {
2364 return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
2365 (__v16hi) __Y, 4,
2366 (__mmask16) -1);
2367 }
2368
2369 extern __inline __mmask16
2370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2371 _mm256_cmplt_epi16_mask (__m256i __X, __m256i __Y)
2372 {
2373 return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
2374 (__v16hi) __Y, 1,
2375 (__mmask16) -1);
2376 }
2377
2378 extern __inline __mmask16
2379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2380 _mm256_cmpge_epi16_mask (__m256i __X, __m256i __Y)
2381 {
2382 return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
2383 (__v16hi) __Y, 5,
2384 (__mmask16) -1);
2385 }
2386
2387 extern __inline __mmask16
2388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2389 _mm256_cmple_epi16_mask (__m256i __X, __m256i __Y)
2390 {
2391 return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
2392 (__v16hi) __Y, 2,
2393 (__mmask16) -1);
2394 }
2395
2396 extern __inline __mmask16
2397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2398 _mm_cmpneq_epu8_mask (__m128i __X, __m128i __Y)
2399 {
2400 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
2401 (__v16qi) __Y, 4,
2402 (__mmask16) -1);
2403 }
2404
2405 extern __inline __mmask16
2406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2407 _mm_cmplt_epu8_mask (__m128i __X, __m128i __Y)
2408 {
2409 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
2410 (__v16qi) __Y, 1,
2411 (__mmask16) -1);
2412 }
2413
2414 extern __inline __mmask16
2415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2416 _mm_cmpge_epu8_mask (__m128i __X, __m128i __Y)
2417 {
2418 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
2419 (__v16qi) __Y, 5,
2420 (__mmask16) -1);
2421 }
2422
2423 extern __inline __mmask16
2424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2425 _mm_cmple_epu8_mask (__m128i __X, __m128i __Y)
2426 {
2427 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
2428 (__v16qi) __Y, 2,
2429 (__mmask16) -1);
2430 }
2431
2432 extern __inline __mmask8
2433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2434 _mm_cmpneq_epu16_mask (__m128i __X, __m128i __Y)
2435 {
2436 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
2437 (__v8hi) __Y, 4,
2438 (__mmask8) -1);
2439 }
2440
2441 extern __inline __mmask8
2442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2443 _mm_cmplt_epu16_mask (__m128i __X, __m128i __Y)
2444 {
2445 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
2446 (__v8hi) __Y, 1,
2447 (__mmask8) -1);
2448 }
2449
2450 extern __inline __mmask8
2451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2452 _mm_cmpge_epu16_mask (__m128i __X, __m128i __Y)
2453 {
2454 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
2455 (__v8hi) __Y, 5,
2456 (__mmask8) -1);
2457 }
2458
2459 extern __inline __mmask8
2460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2461 _mm_cmple_epu16_mask (__m128i __X, __m128i __Y)
2462 {
2463 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
2464 (__v8hi) __Y, 2,
2465 (__mmask8) -1);
2466 }
2467
2468 extern __inline __mmask16
2469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2470 _mm_cmpneq_epi8_mask (__m128i __X, __m128i __Y)
2471 {
2472 return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
2473 (__v16qi) __Y, 4,
2474 (__mmask16) -1);
2475 }
2476
2477 extern __inline __mmask16
2478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2479 _mm_cmplt_epi8_mask (__m128i __X, __m128i __Y)
2480 {
2481 return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
2482 (__v16qi) __Y, 1,
2483 (__mmask16) -1);
2484 }
2485
2486 extern __inline __mmask16
2487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2488 _mm_cmpge_epi8_mask (__m128i __X, __m128i __Y)
2489 {
2490 return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
2491 (__v16qi) __Y, 5,
2492 (__mmask16) -1);
2493 }
2494
2495 extern __inline __mmask16
2496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2497 _mm_cmple_epi8_mask (__m128i __X, __m128i __Y)
2498 {
2499 return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
2500 (__v16qi) __Y, 2,
2501 (__mmask16) -1);
2502 }
2503
2504 extern __inline __mmask8
2505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2506 _mm_cmpneq_epi16_mask (__m128i __X, __m128i __Y)
2507 {
2508 return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
2509 (__v8hi) __Y, 4,
2510 (__mmask8) -1);
2511 }
2512
2513 extern __inline __mmask8
2514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2515 _mm_cmplt_epi16_mask (__m128i __X, __m128i __Y)
2516 {
2517 return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
2518 (__v8hi) __Y, 1,
2519 (__mmask8) -1);
2520 }
2521
2522 extern __inline __mmask8
2523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2524 _mm_cmpge_epi16_mask (__m128i __X, __m128i __Y)
2525 {
2526 return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
2527 (__v8hi) __Y, 5,
2528 (__mmask8) -1);
2529 }
2530
2531 extern __inline __mmask8
2532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2533 _mm_cmple_epi16_mask (__m128i __X, __m128i __Y)
2534 {
2535 return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
2536 (__v8hi) __Y, 2,
2537 (__mmask8) -1);
2538 }
2539
2540 extern __inline __m256i
2541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2542 _mm256_mask_mulhrs_epi16 (__m256i __W, __mmask16 __U, __m256i __X,
2543 __m256i __Y)
2544 {
2545 return (__m256i) __builtin_ia32_pmulhrsw256_mask ((__v16hi) __X,
2546 (__v16hi) __Y,
2547 (__v16hi) __W,
2548 (__mmask16) __U);
2549 }
2550
2551 extern __inline __m256i
2552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2553 _mm256_maskz_mulhrs_epi16 (__mmask16 __U, __m256i __X, __m256i __Y)
2554 {
2555 return (__m256i) __builtin_ia32_pmulhrsw256_mask ((__v16hi) __X,
2556 (__v16hi) __Y,
2557 (__v16hi)
2558 _mm256_avx512_setzero_si256 (),
2559 (__mmask16) __U);
2560 }
2561
2562 extern __inline __m256i
2563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2564 _mm256_mask_mulhi_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
2565 __m256i __B)
2566 {
2567 return (__m256i) __builtin_ia32_pmulhuw256_mask ((__v16hi) __A,
2568 (__v16hi) __B,
2569 (__v16hi) __W,
2570 (__mmask16) __U);
2571 }
2572
2573 extern __inline __m256i
2574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2575 _mm256_maskz_mulhi_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
2576 {
2577 return (__m256i) __builtin_ia32_pmulhuw256_mask ((__v16hi) __A,
2578 (__v16hi) __B,
2579 (__v16hi)
2580 _mm256_avx512_setzero_si256 (),
2581 (__mmask16) __U);
2582 }
2583
2584 extern __inline __m256i
2585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2586 _mm256_mask_mulhi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2587 __m256i __B)
2588 {
2589 return (__m256i) __builtin_ia32_pmulhw256_mask ((__v16hi) __A,
2590 (__v16hi) __B,
2591 (__v16hi) __W,
2592 (__mmask16) __U);
2593 }
2594
2595 extern __inline __m256i
2596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2597 _mm256_maskz_mulhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
2598 {
2599 return (__m256i) __builtin_ia32_pmulhw256_mask ((__v16hi) __A,
2600 (__v16hi) __B,
2601 (__v16hi)
2602 _mm256_avx512_setzero_si256 (),
2603 (__mmask16) __U);
2604 }
2605
2606 extern __inline __m128i
2607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2608 _mm_mask_mulhi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
2609 __m128i __B)
2610 {
2611 return (__m128i) __builtin_ia32_pmulhw128_mask ((__v8hi) __A,
2612 (__v8hi) __B,
2613 (__v8hi) __W,
2614 (__mmask8) __U);
2615 }
2616
2617 extern __inline __m128i
2618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2619 _mm_maskz_mulhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
2620 {
2621 return (__m128i) __builtin_ia32_pmulhw128_mask ((__v8hi) __A,
2622 (__v8hi) __B,
2623 (__v8hi)
2624 _mm_avx512_setzero_si128 (),
2625 (__mmask8) __U);
2626 }
2627
2628 extern __inline __m128i
2629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2630 _mm_mask_mulhi_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
2631 __m128i __B)
2632 {
2633 return (__m128i) __builtin_ia32_pmulhuw128_mask ((__v8hi) __A,
2634 (__v8hi) __B,
2635 (__v8hi) __W,
2636 (__mmask8) __U);
2637 }
2638
2639 extern __inline __m128i
2640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2641 _mm_maskz_mulhi_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
2642 {
2643 return (__m128i) __builtin_ia32_pmulhuw128_mask ((__v8hi) __A,
2644 (__v8hi) __B,
2645 (__v8hi)
2646 _mm_avx512_setzero_si128 (),
2647 (__mmask8) __U);
2648 }
2649
2650 extern __inline __m128i
2651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2652 _mm_mask_mulhrs_epi16 (__m128i __W, __mmask8 __U, __m128i __X,
2653 __m128i __Y)
2654 {
2655 return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
2656 (__v8hi) __Y,
2657 (__v8hi) __W,
2658 (__mmask8) __U);
2659 }
2660
2661 extern __inline __m128i
2662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2663 _mm_maskz_mulhrs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y)
2664 {
2665 return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
2666 (__v8hi) __Y,
2667 (__v8hi)
2668 _mm_avx512_setzero_si128 (),
2669 (__mmask8) __U);
2670 }
2671
2672 extern __inline __m256i
2673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2674 _mm256_mask_mullo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2675 __m256i __B)
2676 {
2677 return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A,
2678 (__v16hi) __B,
2679 (__v16hi) __W,
2680 (__mmask16) __U);
2681 }
2682
2683 extern __inline __m256i
2684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2685 _mm256_maskz_mullo_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
2686 {
2687 return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A,
2688 (__v16hi) __B,
2689 (__v16hi)
2690 _mm256_avx512_setzero_si256 (),
2691 (__mmask16) __U);
2692 }
2693
2694 extern __inline __m128i
2695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2696 _mm_mask_mullo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
2697 __m128i __B)
2698 {
2699 return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
2700 (__v8hi) __B,
2701 (__v8hi) __W,
2702 (__mmask8) __U);
2703 }
2704
2705 extern __inline __m128i
2706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2707 _mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
2708 {
2709 return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
2710 (__v8hi) __B,
2711 (__v8hi)
2712 _mm_avx512_setzero_si128 (),
2713 (__mmask8) __U);
2714 }
2715
2716 extern __inline __m256i
2717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2718 _mm256_mask_cvtepi8_epi16 (__m256i __W, __mmask16 __U, __m128i __A)
2719 {
2720 return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A,
2721 (__v16hi) __W,
2722 (__mmask16) __U);
2723 }
2724
2725 extern __inline __m256i
2726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2727 _mm256_maskz_cvtepi8_epi16 (__mmask16 __U, __m128i __A)
2728 {
2729 return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A,
2730 (__v16hi)
2731 _mm256_avx512_setzero_si256 (),
2732 (__mmask16) __U);
2733 }
2734
2735 extern __inline __m128i
2736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2737 _mm_mask_cvtepi8_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
2738 {
2739 return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A,
2740 (__v8hi) __W,
2741 (__mmask8) __U);
2742 }
2743
2744 extern __inline __m128i
2745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2746 _mm_maskz_cvtepi8_epi16 (__mmask8 __U, __m128i __A)
2747 {
2748 return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A,
2749 (__v8hi)
2750 _mm_avx512_setzero_si128 (),
2751 (__mmask8) __U);
2752 }
2753
2754 extern __inline __m256i
2755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2756 _mm256_mask_cvtepu8_epi16 (__m256i __W, __mmask16 __U, __m128i __A)
2757 {
2758 return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A,
2759 (__v16hi) __W,
2760 (__mmask16) __U);
2761 }
2762
2763 extern __inline __m256i
2764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2765 _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A)
2766 {
2767 return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A,
2768 (__v16hi)
2769 _mm256_avx512_setzero_si256 (),
2770 (__mmask16) __U);
2771 }
2772
2773 extern __inline __m128i
2774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2775 _mm_mask_cvtepu8_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
2776 {
2777 return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A,
2778 (__v8hi) __W,
2779 (__mmask8) __U);
2780 }
2781
2782 extern __inline __m128i
2783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2784 _mm_maskz_cvtepu8_epi16 (__mmask8 __U, __m128i __A)
2785 {
2786 return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A,
2787 (__v8hi)
2788 _mm_avx512_setzero_si128 (),
2789 (__mmask8) __U);
2790 }
2791
2792 extern __inline __m256i
2793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2794 _mm256_mask_avg_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
2795 __m256i __B)
2796 {
2797 return (__m256i) __builtin_ia32_pavgb256_mask ((__v32qi) __A,
2798 (__v32qi) __B,
2799 (__v32qi) __W,
2800 (__mmask32) __U);
2801 }
2802
2803 extern __inline __m256i
2804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2805 _mm256_maskz_avg_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
2806 {
2807 return (__m256i) __builtin_ia32_pavgb256_mask ((__v32qi) __A,
2808 (__v32qi) __B,
2809 (__v32qi)
2810 _mm256_avx512_setzero_si256 (),
2811 (__mmask32) __U);
2812 }
2813
2814 extern __inline __m128i
2815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2816 _mm_mask_avg_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
2817 __m128i __B)
2818 {
2819 return (__m128i) __builtin_ia32_pavgb128_mask ((__v16qi) __A,
2820 (__v16qi) __B,
2821 (__v16qi) __W,
2822 (__mmask16) __U);
2823 }
2824
2825 extern __inline __m128i
2826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2827 _mm_maskz_avg_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
2828 {
2829 return (__m128i) __builtin_ia32_pavgb128_mask ((__v16qi) __A,
2830 (__v16qi) __B,
2831 (__v16qi)
2832 _mm_avx512_setzero_si128 (),
2833 (__mmask16) __U);
2834 }
2835
2836 extern __inline __m256i
2837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2838 _mm256_mask_avg_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
2839 __m256i __B)
2840 {
2841 return (__m256i) __builtin_ia32_pavgw256_mask ((__v16hi) __A,
2842 (__v16hi) __B,
2843 (__v16hi) __W,
2844 (__mmask16) __U);
2845 }
2846
2847 extern __inline __m256i
2848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2849 _mm256_maskz_avg_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
2850 {
2851 return (__m256i) __builtin_ia32_pavgw256_mask ((__v16hi) __A,
2852 (__v16hi) __B,
2853 (__v16hi)
2854 _mm256_avx512_setzero_si256 (),
2855 (__mmask16) __U);
2856 }
2857
2858 extern __inline __m128i
2859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2860 _mm_mask_avg_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
2861 __m128i __B)
2862 {
2863 return (__m128i) __builtin_ia32_pavgw128_mask ((__v8hi) __A,
2864 (__v8hi) __B,
2865 (__v8hi) __W,
2866 (__mmask8) __U);
2867 }
2868
2869 extern __inline __m128i
2870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2871 _mm_maskz_avg_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
2872 {
2873 return (__m128i) __builtin_ia32_pavgw128_mask ((__v8hi) __A,
2874 (__v8hi) __B,
2875 (__v8hi)
2876 _mm_avx512_setzero_si128 (),
2877 (__mmask8) __U);
2878 }
2879
2880 extern __inline __m256i
2881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2882 _mm256_mask_add_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
2883 __m256i __B)
2884 {
2885 return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A,
2886 (__v32qi) __B,
2887 (__v32qi) __W,
2888 (__mmask32) __U);
2889 }
2890
2891 extern __inline __m256i
2892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2893 _mm256_maskz_add_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
2894 {
2895 return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A,
2896 (__v32qi) __B,
2897 (__v32qi)
2898 _mm256_avx512_setzero_si256 (),
2899 (__mmask32) __U);
2900 }
2901
2902 extern __inline __m256i
2903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2904 _mm256_mask_add_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2905 __m256i __B)
2906 {
2907 return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A,
2908 (__v16hi) __B,
2909 (__v16hi) __W,
2910 (__mmask16) __U);
2911 }
2912
2913 extern __inline __m256i
2914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2915 _mm256_maskz_add_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
2916 {
2917 return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A,
2918 (__v16hi) __B,
2919 (__v16hi)
2920 _mm256_avx512_setzero_si256 (),
2921 (__mmask16) __U);
2922 }
2923
2924 extern __inline __m256i
2925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2926 _mm256_mask_adds_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
2927 __m256i __B)
2928 {
2929 return (__m256i) __builtin_ia32_paddsb256_mask ((__v32qi) __A,
2930 (__v32qi) __B,
2931 (__v32qi) __W,
2932 (__mmask32) __U);
2933 }
2934
2935 extern __inline __m256i
2936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2937 _mm256_maskz_adds_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
2938 {
2939 return (__m256i) __builtin_ia32_paddsb256_mask ((__v32qi) __A,
2940 (__v32qi) __B,
2941 (__v32qi)
2942 _mm256_avx512_setzero_si256 (),
2943 (__mmask32) __U);
2944 }
2945
2946 extern __inline __m256i
2947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2948 _mm256_mask_adds_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2949 __m256i __B)
2950 {
2951 return (__m256i) __builtin_ia32_paddsw256_mask ((__v16hi) __A,
2952 (__v16hi) __B,
2953 (__v16hi) __W,
2954 (__mmask16) __U);
2955 }
2956
2957 extern __inline __m256i
2958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2959 _mm256_maskz_adds_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
2960 {
2961 return (__m256i) __builtin_ia32_paddsw256_mask ((__v16hi) __A,
2962 (__v16hi) __B,
2963 (__v16hi)
2964 _mm256_avx512_setzero_si256 (),
2965 (__mmask16) __U);
2966 }
2967
2968 extern __inline __m256i
2969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2970 _mm256_mask_adds_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
2971 __m256i __B)
2972 {
2973 return (__m256i) __builtin_ia32_paddusb256_mask ((__v32qi) __A,
2974 (__v32qi) __B,
2975 (__v32qi) __W,
2976 (__mmask32) __U);
2977 }
2978
2979 extern __inline __m256i
2980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2981 _mm256_maskz_adds_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
2982 {
2983 return (__m256i) __builtin_ia32_paddusb256_mask ((__v32qi) __A,
2984 (__v32qi) __B,
2985 (__v32qi)
2986 _mm256_avx512_setzero_si256 (),
2987 (__mmask32) __U);
2988 }
2989
2990 extern __inline __m256i
2991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2992 _mm256_mask_adds_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
2993 __m256i __B)
2994 {
2995 return (__m256i) __builtin_ia32_paddusw256_mask ((__v16hi) __A,
2996 (__v16hi) __B,
2997 (__v16hi) __W,
2998 (__mmask16) __U);
2999 }
3000
3001 extern __inline __m256i
3002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3003 _mm256_maskz_adds_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
3004 {
3005 return (__m256i) __builtin_ia32_paddusw256_mask ((__v16hi) __A,
3006 (__v16hi) __B,
3007 (__v16hi)
3008 _mm256_avx512_setzero_si256 (),
3009 (__mmask16) __U);
3010 }
3011
3012 extern __inline __m256i
3013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3014 _mm256_mask_sub_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
3015 __m256i __B)
3016 {
3017 return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A,
3018 (__v32qi) __B,
3019 (__v32qi) __W,
3020 (__mmask32) __U);
3021 }
3022
3023 extern __inline __m256i
3024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3025 _mm256_maskz_sub_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
3026 {
3027 return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A,
3028 (__v32qi) __B,
3029 (__v32qi)
3030 _mm256_avx512_setzero_si256 (),
3031 (__mmask32) __U);
3032 }
3033
3034 extern __inline __m256i
3035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3036 _mm256_mask_sub_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
3037 __m256i __B)
3038 {
3039 return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A,
3040 (__v16hi) __B,
3041 (__v16hi) __W,
3042 (__mmask16) __U);
3043 }
3044
3045 extern __inline __m256i
3046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3047 _mm256_maskz_sub_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
3048 {
3049 return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A,
3050 (__v16hi) __B,
3051 (__v16hi)
3052 _mm256_avx512_setzero_si256 (),
3053 (__mmask16) __U);
3054 }
3055
3056 extern __inline __m256i
3057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3058 _mm256_mask_subs_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
3059 __m256i __B)
3060 {
3061 return (__m256i) __builtin_ia32_psubsb256_mask ((__v32qi) __A,
3062 (__v32qi) __B,
3063 (__v32qi) __W,
3064 (__mmask32) __U);
3065 }
3066
3067 extern __inline __m256i
3068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3069 _mm256_maskz_subs_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
3070 {
3071 return (__m256i) __builtin_ia32_psubsb256_mask ((__v32qi) __A,
3072 (__v32qi) __B,
3073 (__v32qi)
3074 _mm256_avx512_setzero_si256 (),
3075 (__mmask32) __U);
3076 }
3077
3078 extern __inline __m256i
3079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3080 _mm256_mask_subs_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
3081 __m256i __B)
3082 {
3083 return (__m256i) __builtin_ia32_psubsw256_mask ((__v16hi) __A,
3084 (__v16hi) __B,
3085 (__v16hi) __W,
3086 (__mmask16) __U);
3087 }
3088
3089 extern __inline __m256i
3090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3091 _mm256_maskz_subs_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
3092 {
3093 return (__m256i) __builtin_ia32_psubsw256_mask ((__v16hi) __A,
3094 (__v16hi) __B,
3095 (__v16hi)
3096 _mm256_avx512_setzero_si256 (),
3097 (__mmask16) __U);
3098 }
3099
3100 extern __inline __m256i
3101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3102 _mm256_mask_subs_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
3103 __m256i __B)
3104 {
3105 return (__m256i) __builtin_ia32_psubusb256_mask ((__v32qi) __A,
3106 (__v32qi) __B,
3107 (__v32qi) __W,
3108 (__mmask32) __U);
3109 }
3110
3111 extern __inline __m256i
3112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3113 _mm256_maskz_subs_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
3114 {
3115 return (__m256i) __builtin_ia32_psubusb256_mask ((__v32qi) __A,
3116 (__v32qi) __B,
3117 (__v32qi)
3118 _mm256_avx512_setzero_si256 (),
3119 (__mmask32) __U);
3120 }
3121
3122 extern __inline __m256i
3123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3124 _mm256_mask_subs_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
3125 __m256i __B)
3126 {
3127 return (__m256i) __builtin_ia32_psubusw256_mask ((__v16hi) __A,
3128 (__v16hi) __B,
3129 (__v16hi) __W,
3130 (__mmask16) __U);
3131 }
3132
3133 extern __inline __m256i
3134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3135 _mm256_maskz_subs_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
3136 {
3137 return (__m256i) __builtin_ia32_psubusw256_mask ((__v16hi) __A,
3138 (__v16hi) __B,
3139 (__v16hi)
3140 _mm256_avx512_setzero_si256 (),
3141 (__mmask16) __U);
3142 }
3143
3144 extern __inline __m128i
3145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3146 _mm_mask_add_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
3147 __m128i __B)
3148 {
3149 return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A,
3150 (__v16qi) __B,
3151 (__v16qi) __W,
3152 (__mmask16) __U);
3153 }
3154
3155 extern __inline __m128i
3156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3157 _mm_maskz_add_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
3158 {
3159 return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A,
3160 (__v16qi) __B,
3161 (__v16qi)
3162 _mm_avx512_setzero_si128 (),
3163 (__mmask16) __U);
3164 }
3165
3166 extern __inline __m128i
3167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3168 _mm_mask_add_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
3169 __m128i __B)
3170 {
3171 return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A,
3172 (__v8hi) __B,
3173 (__v8hi) __W,
3174 (__mmask8) __U);
3175 }
3176
3177 extern __inline __m128i
3178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3179 _mm_maskz_add_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
3180 {
3181 return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A,
3182 (__v8hi) __B,
3183 (__v8hi)
3184 _mm_avx512_setzero_si128 (),
3185 (__mmask8) __U);
3186 }
3187
3188 extern __inline __m256i
3189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3190 _mm256_mask_unpackhi_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
3191 __m256i __B)
3192 {
3193 return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
3194 (__v32qi) __B,
3195 (__v32qi) __W,
3196 (__mmask32) __U);
3197 }
3198
3199 extern __inline __m256i
3200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3201 _mm256_maskz_unpackhi_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
3202 {
3203 return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
3204 (__v32qi) __B,
3205 (__v32qi)
3206 _mm256_avx512_setzero_si256 (),
3207 (__mmask32) __U);
3208 }
3209
3210 extern __inline __m128i
3211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3212 _mm_mask_unpackhi_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
3213 __m128i __B)
3214 {
3215 return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
3216 (__v16qi) __B,
3217 (__v16qi) __W,
3218 (__mmask16) __U);
3219 }
3220
3221 extern __inline __m128i
3222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3223 _mm_maskz_unpackhi_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
3224 {
3225 return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
3226 (__v16qi) __B,
3227 (__v16qi)
3228 _mm_avx512_setzero_si128 (),
3229 (__mmask16) __U);
3230 }
3231
3232 extern __inline __m256i
3233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3234 _mm256_mask_unpackhi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
3235 __m256i __B)
3236 {
3237 return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
3238 (__v16hi) __B,
3239 (__v16hi) __W,
3240 (__mmask16) __U);
3241 }
3242
3243 extern __inline __m256i
3244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3245 _mm256_maskz_unpackhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
3246 {
3247 return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
3248 (__v16hi) __B,
3249 (__v16hi)
3250 _mm256_avx512_setzero_si256 (),
3251 (__mmask16) __U);
3252 }
3253
3254 extern __inline __m128i
3255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3256 _mm_mask_unpackhi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
3257 __m128i __B)
3258 {
3259 return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
3260 (__v8hi) __B,
3261 (__v8hi) __W,
3262 (__mmask8) __U);
3263 }
3264
3265 extern __inline __m128i
3266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3267 _mm_maskz_unpackhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
3268 {
3269 return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
3270 (__v8hi) __B,
3271 (__v8hi)
3272 _mm_avx512_setzero_si128 (),
3273 (__mmask8) __U);
3274 }
3275
3276 extern __inline __m256i
3277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3278 _mm256_mask_unpacklo_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
3279 __m256i __B)
3280 {
3281 return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
3282 (__v32qi) __B,
3283 (__v32qi) __W,
3284 (__mmask32) __U);
3285 }
3286
3287 extern __inline __m256i
3288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3289 _mm256_maskz_unpacklo_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
3290 {
3291 return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
3292 (__v32qi) __B,
3293 (__v32qi)
3294 _mm256_avx512_setzero_si256 (),
3295 (__mmask32) __U);
3296 }
3297
3298 extern __inline __m128i
3299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3300 _mm_mask_unpacklo_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
3301 __m128i __B)
3302 {
3303 return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
3304 (__v16qi) __B,
3305 (__v16qi) __W,
3306 (__mmask16) __U);
3307 }
3308
3309 extern __inline __m128i
3310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3311 _mm_maskz_unpacklo_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
3312 {
3313 return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
3314 (__v16qi) __B,
3315 (__v16qi)
3316 _mm_avx512_setzero_si128 (),
3317 (__mmask16) __U);
3318 }
3319
3320 extern __inline __m256i
3321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3322 _mm256_mask_unpacklo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
3323 __m256i __B)
3324 {
3325 return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
3326 (__v16hi) __B,
3327 (__v16hi) __W,
3328 (__mmask16) __U);
3329 }
3330
3331 extern __inline __m256i
3332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3333 _mm256_maskz_unpacklo_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
3334 {
3335 return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
3336 (__v16hi) __B,
3337 (__v16hi)
3338 _mm256_avx512_setzero_si256 (),
3339 (__mmask16) __U);
3340 }
3341
3342 extern __inline __m128i
3343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3344 _mm_mask_unpacklo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
3345 __m128i __B)
3346 {
3347 return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
3348 (__v8hi) __B,
3349 (__v8hi) __W,
3350 (__mmask8) __U);
3351 }
3352
3353 extern __inline __m128i
3354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3355 _mm_maskz_unpacklo_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
3356 {
3357 return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
3358 (__v8hi) __B,
3359 (__v8hi)
3360 _mm_avx512_setzero_si128 (),
3361 (__mmask8) __U);
3362 }
3363
3364 extern __inline __mmask16
3365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3366 _mm_cmpeq_epi8_mask (__m128i __A, __m128i __B)
3367 {
3368 return (__mmask16) __builtin_ia32_pcmpeqb128_mask ((__v16qi) __A,
3369 (__v16qi) __B,
3370 (__mmask16) -1);
3371 }
3372
3373 extern __inline __mmask16
3374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3375 _mm_cmpeq_epu8_mask (__m128i __A, __m128i __B)
3376 {
3377 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __A,
3378 (__v16qi) __B, 0,
3379 (__mmask16) -1);
3380 }
3381
3382 extern __inline __mmask16
3383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3384 _mm_mask_cmpeq_epu8_mask (__mmask16 __U, __m128i __A, __m128i __B)
3385 {
3386 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __A,
3387 (__v16qi) __B, 0,
3388 __U);
3389 }
3390
3391 extern __inline __mmask16
3392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3393 _mm_mask_cmpeq_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
3394 {
3395 return (__mmask16) __builtin_ia32_pcmpeqb128_mask ((__v16qi) __A,
3396 (__v16qi) __B,
3397 __U);
3398 }
3399
3400 extern __inline __mmask32
3401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3402 _mm256_cmpeq_epu8_mask (__m256i __A, __m256i __B)
3403 {
3404 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __A,
3405 (__v32qi) __B, 0,
3406 (__mmask32) -1);
3407 }
3408
3409 extern __inline __mmask32
3410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3411 _mm256_cmpeq_epi8_mask (__m256i __A, __m256i __B)
3412 {
3413 return (__mmask32) __builtin_ia32_pcmpeqb256_mask ((__v32qi) __A,
3414 (__v32qi) __B,
3415 (__mmask32) -1);
3416 }
3417
3418 extern __inline __mmask32
3419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3420 _mm256_mask_cmpeq_epu8_mask (__mmask32 __U, __m256i __A, __m256i __B)
3421 {
3422 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __A,
3423 (__v32qi) __B, 0,
3424 __U);
3425 }
3426
3427 extern __inline __mmask32
3428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3429 _mm256_mask_cmpeq_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
3430 {
3431 return (__mmask32) __builtin_ia32_pcmpeqb256_mask ((__v32qi) __A,
3432 (__v32qi) __B,
3433 __U);
3434 }
3435
3436 extern __inline __mmask8
3437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3438 _mm_cmpeq_epu16_mask (__m128i __A, __m128i __B)
3439 {
3440 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __A,
3441 (__v8hi) __B, 0,
3442 (__mmask8) -1);
3443 }
3444
3445 extern __inline __mmask8
3446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3447 _mm_cmpeq_epi16_mask (__m128i __A, __m128i __B)
3448 {
3449 return (__mmask8) __builtin_ia32_pcmpeqw128_mask ((__v8hi) __A,
3450 (__v8hi) __B,
3451 (__mmask8) -1);
3452 }
3453
3454 extern __inline __mmask8
3455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3456 _mm_mask_cmpeq_epu16_mask (__mmask8 __U, __m128i __A, __m128i __B)
3457 {
3458 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __A,
3459 (__v8hi) __B, 0, __U);
3460 }
3461
3462 extern __inline __mmask8
3463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3464 _mm_mask_cmpeq_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
3465 {
3466 return (__mmask8) __builtin_ia32_pcmpeqw128_mask ((__v8hi) __A,
3467 (__v8hi) __B, __U);
3468 }
3469
3470 extern __inline __mmask16
3471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3472 _mm256_cmpeq_epu16_mask (__m256i __A, __m256i __B)
3473 {
3474 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __A,
3475 (__v16hi) __B, 0,
3476 (__mmask16) -1);
3477 }
3478
3479 extern __inline __mmask16
3480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3481 _mm256_cmpeq_epi16_mask (__m256i __A, __m256i __B)
3482 {
3483 return (__mmask16) __builtin_ia32_pcmpeqw256_mask ((__v16hi) __A,
3484 (__v16hi) __B,
3485 (__mmask16) -1);
3486 }
3487
3488 extern __inline __mmask16
3489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3490 _mm256_mask_cmpeq_epu16_mask (__mmask16 __U, __m256i __A, __m256i __B)
3491 {
3492 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __A,
3493 (__v16hi) __B, 0,
3494 __U);
3495 }
3496
3497 extern __inline __mmask16
3498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3499 _mm256_mask_cmpeq_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
3500 {
3501 return (__mmask16) __builtin_ia32_pcmpeqw256_mask ((__v16hi) __A,
3502 (__v16hi) __B,
3503 __U);
3504 }
3505
3506 extern __inline __mmask16
3507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3508 _mm_cmpgt_epu8_mask (__m128i __A, __m128i __B)
3509 {
3510 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __A,
3511 (__v16qi) __B, 6,
3512 (__mmask16) -1);
3513 }
3514
3515 extern __inline __mmask16
3516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3517 _mm_cmpgt_epi8_mask (__m128i __A, __m128i __B)
3518 {
3519 return (__mmask16) __builtin_ia32_pcmpgtb128_mask ((__v16qi) __A,
3520 (__v16qi) __B,
3521 (__mmask16) -1);
3522 }
3523
3524 extern __inline __mmask16
3525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3526 _mm_mask_cmpgt_epu8_mask (__mmask16 __U, __m128i __A, __m128i __B)
3527 {
3528 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __A,
3529 (__v16qi) __B, 6,
3530 __U);
3531 }
3532
3533 extern __inline __mmask16
3534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3535 _mm_mask_cmpgt_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
3536 {
3537 return (__mmask16) __builtin_ia32_pcmpgtb128_mask ((__v16qi) __A,
3538 (__v16qi) __B,
3539 __U);
3540 }
3541
3542 extern __inline __mmask32
3543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3544 _mm256_cmpgt_epu8_mask (__m256i __A, __m256i __B)
3545 {
3546 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __A,
3547 (__v32qi) __B, 6,
3548 (__mmask32) -1);
3549 }
3550
3551 extern __inline __mmask32
3552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3553 _mm256_cmpgt_epi8_mask (__m256i __A, __m256i __B)
3554 {
3555 return (__mmask32) __builtin_ia32_pcmpgtb256_mask ((__v32qi) __A,
3556 (__v32qi) __B,
3557 (__mmask32) -1);
3558 }
3559
3560 extern __inline __mmask32
3561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3562 _mm256_mask_cmpgt_epu8_mask (__mmask32 __U, __m256i __A, __m256i __B)
3563 {
3564 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __A,
3565 (__v32qi) __B, 6,
3566 __U);
3567 }
3568
3569 extern __inline __mmask32
3570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3571 _mm256_mask_cmpgt_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
3572 {
3573 return (__mmask32) __builtin_ia32_pcmpgtb256_mask ((__v32qi) __A,
3574 (__v32qi) __B,
3575 __U);
3576 }
3577
3578 extern __inline __mmask8
3579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3580 _mm_cmpgt_epu16_mask (__m128i __A, __m128i __B)
3581 {
3582 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __A,
3583 (__v8hi) __B, 6,
3584 (__mmask8) -1);
3585 }
3586
3587 extern __inline __mmask8
3588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3589 _mm_cmpgt_epi16_mask (__m128i __A, __m128i __B)
3590 {
3591 return (__mmask8) __builtin_ia32_pcmpgtw128_mask ((__v8hi) __A,
3592 (__v8hi) __B,
3593 (__mmask8) -1);
3594 }
3595
3596 extern __inline __mmask8
3597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3598 _mm_mask_cmpgt_epu16_mask (__mmask8 __U, __m128i __A, __m128i __B)
3599 {
3600 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __A,
3601 (__v8hi) __B, 6, __U);
3602 }
3603
3604 extern __inline __mmask8
3605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3606 _mm_mask_cmpgt_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
3607 {
3608 return (__mmask8) __builtin_ia32_pcmpgtw128_mask ((__v8hi) __A,
3609 (__v8hi) __B, __U);
3610 }
3611
3612 extern __inline __mmask16
3613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3614 _mm256_cmpgt_epu16_mask (__m256i __A, __m256i __B)
3615 {
3616 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __A,
3617 (__v16hi) __B, 6,
3618 (__mmask16) -1);
3619 }
3620
3621 extern __inline __mmask16
3622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3623 _mm256_cmpgt_epi16_mask (__m256i __A, __m256i __B)
3624 {
3625 return (__mmask16) __builtin_ia32_pcmpgtw256_mask ((__v16hi) __A,
3626 (__v16hi) __B,
3627 (__mmask16) -1);
3628 }
3629
3630 extern __inline __mmask16
3631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3632 _mm256_mask_cmpgt_epu16_mask (__mmask16 __U, __m256i __A, __m256i __B)
3633 {
3634 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __A,
3635 (__v16hi) __B, 6,
3636 __U);
3637 }
3638
3639 extern __inline __mmask16
3640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3641 _mm256_mask_cmpgt_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
3642 {
3643 return (__mmask16) __builtin_ia32_pcmpgtw256_mask ((__v16hi) __A,
3644 (__v16hi) __B,
3645 __U);
3646 }
3647
3648 extern __inline __mmask16
3649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3650 _mm_testn_epi8_mask (__m128i __A, __m128i __B)
3651 {
3652 return (__mmask16) __builtin_ia32_ptestnmb128 ((__v16qi) __A,
3653 (__v16qi) __B,
3654 (__mmask16) -1);
3655 }
3656
3657 extern __inline __mmask16
3658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3659 _mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
3660 {
3661 return (__mmask16) __builtin_ia32_ptestnmb128 ((__v16qi) __A,
3662 (__v16qi) __B, __U);
3663 }
3664
3665 extern __inline __mmask32
3666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3667 _mm256_testn_epi8_mask (__m256i __A, __m256i __B)
3668 {
3669 return (__mmask32) __builtin_ia32_ptestnmb256 ((__v32qi) __A,
3670 (__v32qi) __B,
3671 (__mmask32) -1);
3672 }
3673
3674 extern __inline __mmask32
3675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3676 _mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
3677 {
3678 return (__mmask32) __builtin_ia32_ptestnmb256 ((__v32qi) __A,
3679 (__v32qi) __B, __U);
3680 }
3681
3682 extern __inline __mmask8
3683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3684 _mm_testn_epi16_mask (__m128i __A, __m128i __B)
3685 {
3686 return (__mmask8) __builtin_ia32_ptestnmw128 ((__v8hi) __A,
3687 (__v8hi) __B,
3688 (__mmask8) -1);
3689 }
3690
3691 extern __inline __mmask8
3692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3693 _mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
3694 {
3695 return (__mmask8) __builtin_ia32_ptestnmw128 ((__v8hi) __A,
3696 (__v8hi) __B, __U);
3697 }
3698
3699 extern __inline __mmask16
3700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3701 _mm256_testn_epi16_mask (__m256i __A, __m256i __B)
3702 {
3703 return (__mmask16) __builtin_ia32_ptestnmw256 ((__v16hi) __A,
3704 (__v16hi) __B,
3705 (__mmask16) -1);
3706 }
3707
3708 extern __inline __mmask16
3709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3710 _mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
3711 {
3712 return (__mmask16) __builtin_ia32_ptestnmw256 ((__v16hi) __A,
3713 (__v16hi) __B, __U);
3714 }
3715
3716 extern __inline __m256i
3717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3718 _mm256_mask_shuffle_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
3719 __m256i __B)
3720 {
3721 return (__m256i) __builtin_ia32_pshufb256_mask ((__v32qi) __A,
3722 (__v32qi) __B,
3723 (__v32qi) __W,
3724 (__mmask32) __U);
3725 }
3726
3727 extern __inline __m256i
3728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3729 _mm256_maskz_shuffle_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
3730 {
3731 return (__m256i) __builtin_ia32_pshufb256_mask ((__v32qi) __A,
3732 (__v32qi) __B,
3733 (__v32qi)
3734 _mm256_avx512_setzero_si256 (),
3735 (__mmask32) __U);
3736 }
3737
3738 extern __inline __m128i
3739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3740 _mm_mask_shuffle_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
3741 __m128i __B)
3742 {
3743 return (__m128i) __builtin_ia32_pshufb128_mask ((__v16qi) __A,
3744 (__v16qi) __B,
3745 (__v16qi) __W,
3746 (__mmask16) __U);
3747 }
3748
3749 extern __inline __m128i
3750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3751 _mm_maskz_shuffle_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
3752 {
3753 return (__m128i) __builtin_ia32_pshufb128_mask ((__v16qi) __A,
3754 (__v16qi) __B,
3755 (__v16qi)
3756 _mm_avx512_setzero_si128 (),
3757 (__mmask16) __U);
3758 }
3759
3760 extern __inline __m256i
3761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3762 _mm256_maskz_packs_epi16 (__mmask32 __M, __m256i __A, __m256i __B)
3763 {
3764 return (__m256i) __builtin_ia32_packsswb256_mask ((__v16hi) __A,
3765 (__v16hi) __B,
3766 (__v32qi)
3767 _mm256_avx512_setzero_si256 (),
3768 __M);
3769 }
3770
3771 extern __inline __m256i
3772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3773 _mm256_mask_packs_epi16 (__m256i __W, __mmask32 __M, __m256i __A,
3774 __m256i __B)
3775 {
3776 return (__m256i) __builtin_ia32_packsswb256_mask ((__v16hi) __A,
3777 (__v16hi) __B,
3778 (__v32qi) __W,
3779 __M);
3780 }
3781
3782 extern __inline __m128i
3783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3784 _mm_maskz_packs_epi16 (__mmask16 __M, __m128i __A, __m128i __B)
3785 {
3786 return (__m128i) __builtin_ia32_packsswb128_mask ((__v8hi) __A,
3787 (__v8hi) __B,
3788 (__v16qi)
3789 _mm_avx512_setzero_si128 (),
3790 __M);
3791 }
3792
3793 extern __inline __m128i
3794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3795 _mm_mask_packs_epi16 (__m128i __W, __mmask16 __M, __m128i __A,
3796 __m128i __B)
3797 {
3798 return (__m128i) __builtin_ia32_packsswb128_mask ((__v8hi) __A,
3799 (__v8hi) __B,
3800 (__v16qi) __W,
3801 __M);
3802 }
3803
3804 extern __inline __m256i
3805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3806 _mm256_maskz_packus_epi16 (__mmask32 __M, __m256i __A, __m256i __B)
3807 {
3808 return (__m256i) __builtin_ia32_packuswb256_mask ((__v16hi) __A,
3809 (__v16hi) __B,
3810 (__v32qi)
3811 _mm256_avx512_setzero_si256 (),
3812 __M);
3813 }
3814
3815 extern __inline __m256i
3816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3817 _mm256_mask_packus_epi16 (__m256i __W, __mmask32 __M, __m256i __A,
3818 __m256i __B)
3819 {
3820 return (__m256i) __builtin_ia32_packuswb256_mask ((__v16hi) __A,
3821 (__v16hi) __B,
3822 (__v32qi) __W,
3823 __M);
3824 }
3825
3826 extern __inline __m128i
3827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3828 _mm_maskz_packus_epi16 (__mmask16 __M, __m128i __A, __m128i __B)
3829 {
3830 return (__m128i) __builtin_ia32_packuswb128_mask ((__v8hi) __A,
3831 (__v8hi) __B,
3832 (__v16qi)
3833 _mm_avx512_setzero_si128 (),
3834 __M);
3835 }
3836
3837 extern __inline __m128i
3838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3839 _mm_mask_packus_epi16 (__m128i __W, __mmask16 __M, __m128i __A,
3840 __m128i __B)
3841 {
3842 return (__m128i) __builtin_ia32_packuswb128_mask ((__v8hi) __A,
3843 (__v8hi) __B,
3844 (__v16qi) __W,
3845 __M);
3846 }
3847
3848 extern __inline __m256i
3849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3850 _mm256_mask_abs_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
3851 {
3852 return (__m256i) __builtin_ia32_pabsb256_mask ((__v32qi) __A,
3853 (__v32qi) __W,
3854 (__mmask32) __U);
3855 }
3856
3857 extern __inline __m256i
3858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3859 _mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A)
3860 {
3861 return (__m256i) __builtin_ia32_pabsb256_mask ((__v32qi) __A,
3862 (__v32qi)
3863 _mm256_avx512_setzero_si256 (),
3864 (__mmask32) __U);
3865 }
3866
3867 extern __inline __m128i
3868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3869 _mm_mask_abs_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
3870 {
3871 return (__m128i) __builtin_ia32_pabsb128_mask ((__v16qi) __A,
3872 (__v16qi) __W,
3873 (__mmask16) __U);
3874 }
3875
3876 extern __inline __m128i
3877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3878 _mm_maskz_abs_epi8 (__mmask16 __U, __m128i __A)
3879 {
3880 return (__m128i) __builtin_ia32_pabsb128_mask ((__v16qi) __A,
3881 (__v16qi)
3882 _mm_avx512_setzero_si128 (),
3883 (__mmask16) __U);
3884 }
3885
3886 extern __inline __m256i
3887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3888 _mm256_mask_abs_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
3889 {
3890 return (__m256i) __builtin_ia32_pabsw256_mask ((__v16hi) __A,
3891 (__v16hi) __W,
3892 (__mmask16) __U);
3893 }
3894
3895 extern __inline __m256i
3896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3897 _mm256_maskz_abs_epi16 (__mmask16 __U, __m256i __A)
3898 {
3899 return (__m256i) __builtin_ia32_pabsw256_mask ((__v16hi) __A,
3900 (__v16hi)
3901 _mm256_avx512_setzero_si256 (),
3902 (__mmask16) __U);
3903 }
3904
3905 extern __inline __m128i
3906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3907 _mm_mask_abs_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
3908 {
3909 return (__m128i) __builtin_ia32_pabsw128_mask ((__v8hi) __A,
3910 (__v8hi) __W,
3911 (__mmask8) __U);
3912 }
3913
3914 extern __inline __m128i
3915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3916 _mm_maskz_abs_epi16 (__mmask8 __U, __m128i __A)
3917 {
3918 return (__m128i) __builtin_ia32_pabsw128_mask ((__v8hi) __A,
3919 (__v8hi)
3920 _mm_avx512_setzero_si128 (),
3921 (__mmask8) __U);
3922 }
3923
3924 extern __inline __mmask32
3925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3926 _mm256_cmpneq_epu8_mask (__m256i __X, __m256i __Y)
3927 {
3928 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
3929 (__v32qi) __Y, 4,
3930 (__mmask32) -1);
3931 }
3932
3933 extern __inline __mmask32
3934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3935 _mm256_cmplt_epu8_mask (__m256i __X, __m256i __Y)
3936 {
3937 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
3938 (__v32qi) __Y, 1,
3939 (__mmask32) -1);
3940 }
3941
3942 extern __inline __mmask32
3943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3944 _mm256_cmpge_epu8_mask (__m256i __X, __m256i __Y)
3945 {
3946 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
3947 (__v32qi) __Y, 5,
3948 (__mmask32) -1);
3949 }
3950
3951 extern __inline __mmask32
3952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3953 _mm256_cmple_epu8_mask (__m256i __X, __m256i __Y)
3954 {
3955 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
3956 (__v32qi) __Y, 2,
3957 (__mmask32) -1);
3958 }
3959
3960 extern __inline __mmask16
3961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3962 _mm256_cmpneq_epu16_mask (__m256i __X, __m256i __Y)
3963 {
3964 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
3965 (__v16hi) __Y, 4,
3966 (__mmask16) -1);
3967 }
3968
3969 extern __inline __mmask16
3970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3971 _mm256_cmplt_epu16_mask (__m256i __X, __m256i __Y)
3972 {
3973 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
3974 (__v16hi) __Y, 1,
3975 (__mmask16) -1);
3976 }
3977
3978 extern __inline __mmask16
3979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3980 _mm256_cmpge_epu16_mask (__m256i __X, __m256i __Y)
3981 {
3982 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
3983 (__v16hi) __Y, 5,
3984 (__mmask16) -1);
3985 }
3986
3987 extern __inline __mmask16
3988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3989 _mm256_cmple_epu16_mask (__m256i __X, __m256i __Y)
3990 {
3991 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
3992 (__v16hi) __Y, 2,
3993 (__mmask16) -1);
3994 }
3995
3996 extern __inline void
3997 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3998 _mm256_storeu_epi16 (void *__P, __m256i __A)
3999 {
4000 *(__v16hi_u *) __P = (__v16hi_u) __A;
4001 }
4002
4003 extern __inline void
4004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4005 _mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
4006 {
4007 __builtin_ia32_storedquhi256_mask ((short *) __P,
4008 (__v16hi) __A,
4009 (__mmask16) __U);
4010 }
4011
4012 extern __inline void
4013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4014 _mm_storeu_epi16 (void *__P, __m128i __A)
4015 {
4016 *(__v8hi_u *) __P = (__v8hi_u) __A;
4017 }
4018
4019 extern __inline void
4020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4021 _mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
4022 {
4023 __builtin_ia32_storedquhi128_mask ((short *) __P,
4024 (__v8hi) __A,
4025 (__mmask8) __U);
4026 }
4027
4028 extern __inline __m128i
4029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4030 _mm_mask_adds_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4031 __m128i __B)
4032 {
4033 return (__m128i) __builtin_ia32_paddsw128_mask ((__v8hi) __A,
4034 (__v8hi) __B,
4035 (__v8hi) __W,
4036 (__mmask8) __U);
4037 }
4038
4039 extern __inline __m128i
4040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4041 _mm_mask_subs_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
4042 __m128i __B)
4043 {
4044 return (__m128i) __builtin_ia32_psubsb128_mask ((__v16qi) __A,
4045 (__v16qi) __B,
4046 (__v16qi) __W,
4047 (__mmask16) __U);
4048 }
4049
4050 extern __inline __m128i
4051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4052 _mm_maskz_subs_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
4053 {
4054 return (__m128i) __builtin_ia32_psubsb128_mask ((__v16qi) __A,
4055 (__v16qi) __B,
4056 (__v16qi)
4057 _mm_avx512_setzero_si128 (),
4058 (__mmask16) __U);
4059 }
4060
4061 extern __inline __m128i
4062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4063 _mm_mask_subs_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4064 __m128i __B)
4065 {
4066 return (__m128i) __builtin_ia32_psubsw128_mask ((__v8hi) __A,
4067 (__v8hi) __B,
4068 (__v8hi) __W,
4069 (__mmask8) __U);
4070 }
4071
4072 extern __inline __m128i
4073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4074 _mm_maskz_subs_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4075 {
4076 return (__m128i) __builtin_ia32_psubsw128_mask ((__v8hi) __A,
4077 (__v8hi) __B,
4078 (__v8hi)
4079 _mm_avx512_setzero_si128 (),
4080 (__mmask8) __U);
4081 }
4082
4083 extern __inline __m128i
4084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4085 _mm_mask_subs_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
4086 __m128i __B)
4087 {
4088 return (__m128i) __builtin_ia32_psubusb128_mask ((__v16qi) __A,
4089 (__v16qi) __B,
4090 (__v16qi) __W,
4091 (__mmask16) __U);
4092 }
4093
4094 extern __inline __m128i
4095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4096 _mm_maskz_subs_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
4097 {
4098 return (__m128i) __builtin_ia32_psubusb128_mask ((__v16qi) __A,
4099 (__v16qi) __B,
4100 (__v16qi)
4101 _mm_avx512_setzero_si128 (),
4102 (__mmask16) __U);
4103 }
4104
4105 extern __inline __m128i
4106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4107 _mm_mask_subs_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
4108 __m128i __B)
4109 {
4110 return (__m128i) __builtin_ia32_psubusw128_mask ((__v8hi) __A,
4111 (__v8hi) __B,
4112 (__v8hi) __W,
4113 (__mmask8) __U);
4114 }
4115
4116 extern __inline __m128i
4117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4118 _mm_maskz_subs_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
4119 {
4120 return (__m128i) __builtin_ia32_psubusw128_mask ((__v8hi) __A,
4121 (__v8hi) __B,
4122 (__v8hi)
4123 _mm_avx512_setzero_si128 (),
4124 (__mmask8) __U);
4125 }
4126
4127 extern __inline __m256i
4128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4129 _mm256_mask_srl_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
4130 __m128i __B)
4131 {
4132 return (__m256i) __builtin_ia32_psrlw256_mask ((__v16hi) __A,
4133 (__v8hi) __B,
4134 (__v16hi) __W,
4135 (__mmask16) __U);
4136 }
4137
4138 extern __inline __m256i
4139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4140 _mm256_maskz_srl_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
4141 {
4142 return (__m256i) __builtin_ia32_psrlw256_mask ((__v16hi) __A,
4143 (__v8hi) __B,
4144 (__v16hi)
4145 _mm256_avx512_setzero_si256 (),
4146 (__mmask16) __U);
4147 }
4148
4149 extern __inline __m128i
4150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4151 _mm_mask_srl_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4152 __m128i __B)
4153 {
4154 return (__m128i) __builtin_ia32_psrlw128_mask ((__v8hi) __A,
4155 (__v8hi) __B,
4156 (__v8hi) __W,
4157 (__mmask8) __U);
4158 }
4159
4160 extern __inline __m128i
4161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4162 _mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4163 {
4164 return (__m128i) __builtin_ia32_psrlw128_mask ((__v8hi) __A,
4165 (__v8hi) __B,
4166 (__v8hi)
4167 _mm_avx512_setzero_si128 (),
4168 (__mmask8) __U);
4169 }
4170
4171 extern __inline __m256i
4172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4173 _mm256_mask_sra_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
4174 __m128i __B)
4175 {
4176 return (__m256i) __builtin_ia32_psraw256_mask ((__v16hi) __A,
4177 (__v8hi) __B,
4178 (__v16hi) __W,
4179 (__mmask16) __U);
4180 }
4181
4182 extern __inline __m256i
4183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4184 _mm256_maskz_sra_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
4185 {
4186 return (__m256i) __builtin_ia32_psraw256_mask ((__v16hi) __A,
4187 (__v8hi) __B,
4188 (__v16hi)
4189 _mm256_avx512_setzero_si256 (),
4190 (__mmask16) __U);
4191 }
4192
4193 extern __inline __m128i
4194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4195 _mm_mask_sra_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4196 __m128i __B)
4197 {
4198 return (__m128i) __builtin_ia32_psraw128_mask ((__v8hi) __A,
4199 (__v8hi) __B,
4200 (__v8hi) __W,
4201 (__mmask8) __U);
4202 }
4203
4204 extern __inline __m128i
4205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4206 _mm_maskz_sra_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4207 {
4208 return (__m128i) __builtin_ia32_psraw128_mask ((__v8hi) __A,
4209 (__v8hi) __B,
4210 (__v8hi)
4211 _mm_avx512_setzero_si128 (),
4212 (__mmask8) __U);
4213 }
4214
4215 extern __inline __m128i
4216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4217 _mm_maskz_adds_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4218 {
4219 return (__m128i) __builtin_ia32_paddsw128_mask ((__v8hi) __A,
4220 (__v8hi) __B,
4221 (__v8hi)
4222 _mm_avx512_setzero_si128 (),
4223 (__mmask8) __U);
4224 }
4225
4226 extern __inline __m128i
4227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4228 _mm_mask_adds_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
4229 __m128i __B)
4230 {
4231 return (__m128i) __builtin_ia32_paddusb128_mask ((__v16qi) __A,
4232 (__v16qi) __B,
4233 (__v16qi) __W,
4234 (__mmask16) __U);
4235 }
4236
4237 extern __inline __m128i
4238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4239 _mm_maskz_adds_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
4240 {
4241 return (__m128i) __builtin_ia32_paddusb128_mask ((__v16qi) __A,
4242 (__v16qi) __B,
4243 (__v16qi)
4244 _mm_avx512_setzero_si128 (),
4245 (__mmask16) __U);
4246 }
4247
4248 extern __inline __m128i
4249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4250 _mm_mask_adds_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
4251 __m128i __B)
4252 {
4253 return (__m128i) __builtin_ia32_paddusw128_mask ((__v8hi) __A,
4254 (__v8hi) __B,
4255 (__v8hi) __W,
4256 (__mmask8) __U);
4257 }
4258
4259 extern __inline __m128i
4260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4261 _mm_maskz_adds_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
4262 {
4263 return (__m128i) __builtin_ia32_paddusw128_mask ((__v8hi) __A,
4264 (__v8hi) __B,
4265 (__v8hi)
4266 _mm_avx512_setzero_si128 (),
4267 (__mmask8) __U);
4268 }
4269
4270 extern __inline __m128i
4271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4272 _mm_mask_sub_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
4273 __m128i __B)
4274 {
4275 return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A,
4276 (__v16qi) __B,
4277 (__v16qi) __W,
4278 (__mmask16) __U);
4279 }
4280
4281 extern __inline __m128i
4282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4283 _mm_maskz_sub_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
4284 {
4285 return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A,
4286 (__v16qi) __B,
4287 (__v16qi)
4288 _mm_avx512_setzero_si128 (),
4289 (__mmask16) __U);
4290 }
4291
4292 extern __inline __m128i
4293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4294 _mm_mask_sub_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4295 __m128i __B)
4296 {
4297 return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A,
4298 (__v8hi) __B,
4299 (__v8hi) __W,
4300 (__mmask8) __U);
4301 }
4302
4303 extern __inline __m128i
4304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4305 _mm_maskz_sub_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4306 {
4307 return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A,
4308 (__v8hi) __B,
4309 (__v8hi)
4310 _mm_avx512_setzero_si128 (),
4311 (__mmask8) __U);
4312 }
4313
4314 extern __inline __m128i
4315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4316 _mm_mask_adds_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
4317 __m128i __B)
4318 {
4319 return (__m128i) __builtin_ia32_paddsb128_mask ((__v16qi) __A,
4320 (__v16qi) __B,
4321 (__v16qi) __W,
4322 (__mmask16) __U);
4323 }
4324
4325 extern __inline __m128i
4326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4327 _mm_maskz_adds_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
4328 {
4329 return (__m128i) __builtin_ia32_paddsb128_mask ((__v16qi) __A,
4330 (__v16qi) __B,
4331 (__v16qi)
4332 _mm_avx512_setzero_si128 (),
4333 (__mmask16) __U);
4334 }
4335
4336 extern __inline __m128i
4337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4338 _mm_cvtepi16_epi8 (__m128i __A)
4339 {
4340
4341 return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
4342 (__v16qi)_mm_avx512_undefined_si128(),
4343 (__mmask8) -1);
4344 }
4345
4346 extern __inline void
4347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4348 _mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M,__m128i __A)
4349 {
4350 __builtin_ia32_pmovwb128mem_mask ((unsigned long long *) __P , (__v8hi) __A, __M);
4351 }
4352
4353 extern __inline __m128i
4354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4355 _mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
4356 {
4357 return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
4358 (__v16qi) __O, __M);
4359 }
4360
4361 extern __inline __m128i
4362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4363 _mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A)
4364 {
4365 return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
4366 (__v16qi)
4367 _mm_avx512_setzero_si128 (),
4368 __M);
4369 }
4370
4371 extern __inline __m256i
4372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4373 _mm256_srav_epi16 (__m256i __A, __m256i __B)
4374 {
4375 return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
4376 (__v16hi) __B,
4377 (__v16hi)
4378 _mm256_avx512_setzero_si256 (),
4379 (__mmask16) -1);
4380 }
4381
4382 extern __inline __m256i
4383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4384 _mm256_mask_srav_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
4385 __m256i __B)
4386 {
4387 return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
4388 (__v16hi) __B,
4389 (__v16hi) __W,
4390 (__mmask16) __U);
4391 }
4392
4393 extern __inline __m256i
4394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4395 _mm256_maskz_srav_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
4396 {
4397 return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
4398 (__v16hi) __B,
4399 (__v16hi)
4400 _mm256_avx512_setzero_si256 (),
4401 (__mmask16) __U);
4402 }
4403
4404 extern __inline __m128i
4405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4406 _mm_srav_epi16 (__m128i __A, __m128i __B)
4407 {
4408 return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
4409 (__v8hi) __B,
4410 (__v8hi)
4411 _mm_avx512_setzero_si128 (),
4412 (__mmask8) -1);
4413 }
4414
4415 extern __inline __m128i
4416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4417 _mm_mask_srav_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4418 __m128i __B)
4419 {
4420 return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
4421 (__v8hi) __B,
4422 (__v8hi) __W,
4423 (__mmask8) __U);
4424 }
4425
4426 extern __inline __m128i
4427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4428 _mm_maskz_srav_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4429 {
4430 return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
4431 (__v8hi) __B,
4432 (__v8hi)
4433 _mm_avx512_setzero_si128 (),
4434 (__mmask8) __U);
4435 }
4436
4437 extern __inline __m256i
4438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4439 _mm256_srlv_epi16 (__m256i __A, __m256i __B)
4440 {
4441 return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
4442 (__v16hi) __B,
4443 (__v16hi)
4444 _mm256_avx512_setzero_si256 (),
4445 (__mmask16) -1);
4446 }
4447
4448 extern __inline __m256i
4449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4450 _mm256_mask_srlv_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
4451 __m256i __B)
4452 {
4453 return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
4454 (__v16hi) __B,
4455 (__v16hi) __W,
4456 (__mmask16) __U);
4457 }
4458
4459 extern __inline __m256i
4460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4461 _mm256_maskz_srlv_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
4462 {
4463 return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
4464 (__v16hi) __B,
4465 (__v16hi)
4466 _mm256_avx512_setzero_si256 (),
4467 (__mmask16) __U);
4468 }
4469
4470 extern __inline __m128i
4471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4472 _mm_srlv_epi16 (__m128i __A, __m128i __B)
4473 {
4474 return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
4475 (__v8hi) __B,
4476 (__v8hi)
4477 _mm_avx512_setzero_si128 (),
4478 (__mmask8) -1);
4479 }
4480
4481 extern __inline __m128i
4482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4483 _mm_mask_srlv_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4484 __m128i __B)
4485 {
4486 return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
4487 (__v8hi) __B,
4488 (__v8hi) __W,
4489 (__mmask8) __U);
4490 }
4491
4492 extern __inline __m128i
4493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4494 _mm_maskz_srlv_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4495 {
4496 return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
4497 (__v8hi) __B,
4498 (__v8hi)
4499 _mm_avx512_setzero_si128 (),
4500 (__mmask8) __U);
4501 }
4502
4503 extern __inline __m256i
4504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4505 _mm256_sllv_epi16 (__m256i __A, __m256i __B)
4506 {
4507 return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
4508 (__v16hi) __B,
4509 (__v16hi)
4510 _mm256_avx512_setzero_si256 (),
4511 (__mmask16) -1);
4512 }
4513
4514 extern __inline __m256i
4515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4516 _mm256_mask_sllv_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
4517 __m256i __B)
4518 {
4519 return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
4520 (__v16hi) __B,
4521 (__v16hi) __W,
4522 (__mmask16) __U);
4523 }
4524
4525 extern __inline __m256i
4526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4527 _mm256_maskz_sllv_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
4528 {
4529 return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
4530 (__v16hi) __B,
4531 (__v16hi)
4532 _mm256_avx512_setzero_si256 (),
4533 (__mmask16) __U);
4534 }
4535
4536 extern __inline __m128i
4537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4538 _mm_sllv_epi16 (__m128i __A, __m128i __B)
4539 {
4540 return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
4541 (__v8hi) __B,
4542 (__v8hi)
4543 _mm_avx512_setzero_si128 (),
4544 (__mmask8) -1);
4545 }
4546
4547 extern __inline __m128i
4548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4549 _mm_mask_sllv_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4550 __m128i __B)
4551 {
4552 return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
4553 (__v8hi) __B,
4554 (__v8hi) __W,
4555 (__mmask8) __U);
4556 }
4557
4558 extern __inline __m128i
4559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4560 _mm_maskz_sllv_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4561 {
4562 return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
4563 (__v8hi) __B,
4564 (__v8hi)
4565 _mm_avx512_setzero_si128 (),
4566 (__mmask8) __U);
4567 }
4568
4569 extern __inline __m128i
4570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4571 _mm_mask_sll_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4572 __m128i __B)
4573 {
4574 return (__m128i) __builtin_ia32_psllw128_mask ((__v8hi) __A,
4575 (__v8hi) __B,
4576 (__v8hi) __W,
4577 (__mmask8) __U);
4578 }
4579
4580 extern __inline __m128i
4581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4582 _mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4583 {
4584 return (__m128i) __builtin_ia32_psllw128_mask ((__v8hi) __A,
4585 (__v8hi) __B,
4586 (__v8hi)
4587 _mm_avx512_setzero_si128 (),
4588 (__mmask8) __U);
4589 }
4590
4591 extern __inline __m256i
4592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4593 _mm256_mask_sll_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
4594 __m128i __B)
4595 {
4596 return (__m256i) __builtin_ia32_psllw256_mask ((__v16hi) __A,
4597 (__v8hi) __B,
4598 (__v16hi) __W,
4599 (__mmask16) __U);
4600 }
4601
4602 extern __inline __m256i
4603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4604 _mm256_maskz_sll_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
4605 {
4606 return (__m256i) __builtin_ia32_psllw256_mask ((__v16hi) __A,
4607 (__v8hi) __B,
4608 (__v16hi)
4609 _mm256_avx512_setzero_si256 (),
4610 (__mmask16) __U);
4611 }
4612
4613 extern __inline __m256i
4614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4615 _mm256_maskz_packus_epi32 (__mmask16 __M, __m256i __A, __m256i __B)
4616 {
4617 return (__m256i) __builtin_ia32_packusdw256_mask ((__v8si) __A,
4618 (__v8si) __B,
4619 (__v16hi)
4620 _mm256_avx512_setzero_si256 (),
4621 __M);
4622 }
4623
4624 extern __inline __m256i
4625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4626 _mm256_mask_packus_epi32 (__m256i __W, __mmask16 __M, __m256i __A,
4627 __m256i __B)
4628 {
4629 return (__m256i) __builtin_ia32_packusdw256_mask ((__v8si) __A,
4630 (__v8si) __B,
4631 (__v16hi) __W,
4632 __M);
4633 }
4634
4635 extern __inline __m128i
4636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4637 _mm_maskz_packus_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
4638 {
4639 return (__m128i) __builtin_ia32_packusdw128_mask ((__v4si) __A,
4640 (__v4si) __B,
4641 (__v8hi)
4642 _mm_avx512_setzero_si128 (),
4643 __M);
4644 }
4645
4646 extern __inline __m128i
4647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4648 _mm_mask_packus_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
4649 __m128i __B)
4650 {
4651 return (__m128i) __builtin_ia32_packusdw128_mask ((__v4si) __A,
4652 (__v4si) __B,
4653 (__v8hi) __W, __M);
4654 }
4655
4656 extern __inline __m256i
4657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4658 _mm256_maskz_packs_epi32 (__mmask16 __M, __m256i __A, __m256i __B)
4659 {
4660 return (__m256i) __builtin_ia32_packssdw256_mask ((__v8si) __A,
4661 (__v8si) __B,
4662 (__v16hi)
4663 _mm256_avx512_setzero_si256 (),
4664 __M);
4665 }
4666
4667 extern __inline __m256i
4668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4669 _mm256_mask_packs_epi32 (__m256i __W, __mmask16 __M, __m256i __A,
4670 __m256i __B)
4671 {
4672 return (__m256i) __builtin_ia32_packssdw256_mask ((__v8si) __A,
4673 (__v8si) __B,
4674 (__v16hi) __W,
4675 __M);
4676 }
4677
4678 extern __inline __m128i
4679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4680 _mm_maskz_packs_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
4681 {
4682 return (__m128i) __builtin_ia32_packssdw128_mask ((__v4si) __A,
4683 (__v4si) __B,
4684 (__v8hi)
4685 _mm_avx512_setzero_si128 (),
4686 __M);
4687 }
4688
4689 extern __inline __m128i
4690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4691 _mm_mask_packs_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
4692 __m128i __B)
4693 {
4694 return (__m128i) __builtin_ia32_packssdw128_mask ((__v4si) __A,
4695 (__v4si) __B,
4696 (__v8hi) __W, __M);
4697 }
4698
4699 extern __inline __mmask16
4700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4701 _mm_mask_cmpneq_epu8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4702 {
4703 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
4704 (__v16qi) __Y, 4,
4705 (__mmask16) __M);
4706 }
4707
4708 extern __inline __mmask16
4709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4710 _mm_mask_cmplt_epu8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4711 {
4712 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
4713 (__v16qi) __Y, 1,
4714 (__mmask16) __M);
4715 }
4716
4717 extern __inline __mmask16
4718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4719 _mm_mask_cmpge_epu8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4720 {
4721 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
4722 (__v16qi) __Y, 5,
4723 (__mmask16) __M);
4724 }
4725
4726 extern __inline __mmask16
4727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4728 _mm_mask_cmple_epu8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4729 {
4730 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
4731 (__v16qi) __Y, 2,
4732 (__mmask16) __M);
4733 }
4734
4735 extern __inline __mmask8
4736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4737 _mm_mask_cmpneq_epu16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4738 {
4739 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
4740 (__v8hi) __Y, 4,
4741 (__mmask8) __M);
4742 }
4743
4744 extern __inline __mmask8
4745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4746 _mm_mask_cmplt_epu16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4747 {
4748 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
4749 (__v8hi) __Y, 1,
4750 (__mmask8) __M);
4751 }
4752
4753 extern __inline __mmask8
4754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4755 _mm_mask_cmpge_epu16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4756 {
4757 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
4758 (__v8hi) __Y, 5,
4759 (__mmask8) __M);
4760 }
4761
4762 extern __inline __mmask8
4763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4764 _mm_mask_cmple_epu16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4765 {
4766 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
4767 (__v8hi) __Y, 2,
4768 (__mmask8) __M);
4769 }
4770
4771 extern __inline __mmask16
4772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4773 _mm_mask_cmpneq_epi8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4774 {
4775 return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
4776 (__v16qi) __Y, 4,
4777 (__mmask16) __M);
4778 }
4779
4780 extern __inline __mmask16
4781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4782 _mm_mask_cmplt_epi8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4783 {
4784 return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
4785 (__v16qi) __Y, 1,
4786 (__mmask16) __M);
4787 }
4788
4789 extern __inline __mmask16
4790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4791 _mm_mask_cmpge_epi8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4792 {
4793 return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
4794 (__v16qi) __Y, 5,
4795 (__mmask16) __M);
4796 }
4797
4798 extern __inline __mmask16
4799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4800 _mm_mask_cmple_epi8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4801 {
4802 return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
4803 (__v16qi) __Y, 2,
4804 (__mmask16) __M);
4805 }
4806
4807 extern __inline __mmask8
4808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4809 _mm_mask_cmpneq_epi16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4810 {
4811 return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
4812 (__v8hi) __Y, 4,
4813 (__mmask8) __M);
4814 }
4815
4816 extern __inline __mmask8
4817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4818 _mm_mask_cmplt_epi16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4819 {
4820 return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
4821 (__v8hi) __Y, 1,
4822 (__mmask8) __M);
4823 }
4824
4825 extern __inline __mmask8
4826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4827 _mm_mask_cmpge_epi16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4828 {
4829 return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
4830 (__v8hi) __Y, 5,
4831 (__mmask8) __M);
4832 }
4833
4834 extern __inline __mmask8
4835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4836 _mm_mask_cmple_epi16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4837 {
4838 return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
4839 (__v8hi) __Y, 2,
4840 (__mmask8) __M);
4841 }
4842
4843 extern __inline __mmask32
4844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4845 _mm256_mask_cmpneq_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
4846 {
4847 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
4848 (__v32qi) __Y, 4,
4849 (__mmask32) __M);
4850 }
4851
4852 extern __inline __mmask32
4853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4854 _mm256_mask_cmplt_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
4855 {
4856 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
4857 (__v32qi) __Y, 1,
4858 (__mmask32) __M);
4859 }
4860
4861 extern __inline __mmask32
4862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4863 _mm256_mask_cmpge_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
4864 {
4865 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
4866 (__v32qi) __Y, 5,
4867 (__mmask32) __M);
4868 }
4869
4870 extern __inline __mmask32
4871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4872 _mm256_mask_cmple_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
4873 {
4874 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
4875 (__v32qi) __Y, 2,
4876 (__mmask32) __M);
4877 }
4878
4879 extern __inline __mmask16
4880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4881 _mm256_mask_cmpneq_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
4882 {
4883 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
4884 (__v16hi) __Y, 4,
4885 (__mmask16) __M);
4886 }
4887
4888 extern __inline __mmask16
4889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4890 _mm256_mask_cmplt_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
4891 {
4892 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
4893 (__v16hi) __Y, 1,
4894 (__mmask16) __M);
4895 }
4896
4897 extern __inline __mmask16
4898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4899 _mm256_mask_cmpge_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
4900 {
4901 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
4902 (__v16hi) __Y, 5,
4903 (__mmask16) __M);
4904 }
4905
4906 extern __inline __mmask16
4907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4908 _mm256_mask_cmple_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
4909 {
4910 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
4911 (__v16hi) __Y, 2,
4912 (__mmask16) __M);
4913 }
4914
4915 extern __inline __mmask32
4916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4917 _mm256_mask_cmpneq_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
4918 {
4919 return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
4920 (__v32qi) __Y, 4,
4921 (__mmask32) __M);
4922 }
4923
4924 extern __inline __mmask32
4925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4926 _mm256_mask_cmplt_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
4927 {
4928 return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
4929 (__v32qi) __Y, 1,
4930 (__mmask32) __M);
4931 }
4932
4933 extern __inline __mmask32
4934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4935 _mm256_mask_cmpge_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
4936 {
4937 return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
4938 (__v32qi) __Y, 5,
4939 (__mmask32) __M);
4940 }
4941
4942 extern __inline __mmask32
4943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4944 _mm256_mask_cmple_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
4945 {
4946 return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
4947 (__v32qi) __Y, 2,
4948 (__mmask32) __M);
4949 }
4950
4951 extern __inline __mmask16
4952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4953 _mm256_mask_cmpneq_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
4954 {
4955 return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
4956 (__v16hi) __Y, 4,
4957 (__mmask16) __M);
4958 }
4959
4960 extern __inline __mmask16
4961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4962 _mm256_mask_cmplt_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
4963 {
4964 return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
4965 (__v16hi) __Y, 1,
4966 (__mmask16) __M);
4967 }
4968
4969 extern __inline __mmask16
4970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4971 _mm256_mask_cmpge_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
4972 {
4973 return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
4974 (__v16hi) __Y, 5,
4975 (__mmask16) __M);
4976 }
4977
4978 extern __inline __mmask16
4979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4980 _mm256_mask_cmple_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
4981 {
4982 return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
4983 (__v16hi) __Y, 2,
4984 (__mmask16) __M);
4985 }
4986
4987 extern __inline short
4988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4989 _mm_mask_reduce_add_epi16 (__mmask8 __M, __m128i __W)
4990 {
4991 __W = _mm_maskz_mov_epi16 (__M, __W);
4992 _MM_REDUCE_OPERATOR_BASIC_EPI16 (+);
4993 }
4994
4995 extern __inline short
4996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4997 _mm_mask_reduce_mul_epi16 (__mmask8 __M, __m128i __W)
4998 {
4999 __W = _mm_mask_mov_epi16 (_mm_avx512_set1_epi16 (1), __M, __W);
5000 _MM_REDUCE_OPERATOR_BASIC_EPI16 (*);
5001 }
5002
5003 extern __inline short
5004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5005 _mm_mask_reduce_and_epi16 (__mmask8 __M, __m128i __W)
5006 {
5007 __W = _mm_mask_mov_epi16 (_mm_avx512_set1_epi16 (-1), __M, __W);
5008 _MM_REDUCE_OPERATOR_BASIC_EPI16 (&);
5009 }
5010
5011 extern __inline short
5012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5013 _mm_mask_reduce_or_epi16 (__mmask8 __M, __m128i __W)
5014 {
5015 __W = _mm_maskz_mov_epi16 (__M, __W);
5016 _MM_REDUCE_OPERATOR_BASIC_EPI16 (|);
5017 }
5018
5019 extern __inline short
5020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5021 _mm_mask_reduce_max_epi16 (__mmask16 __M, __m128i __V)
5022 {
5023 __V = _mm_mask_mov_epi16 (_mm_avx512_set1_epi16 (-32767-1), __M, __V);
5024 _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (avx512_max_epi16);
5025 }
5026
5027 extern __inline unsigned short
5028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5029 _mm_mask_reduce_max_epu16 (__mmask16 __M, __m128i __V)
5030 {
5031 __V = _mm_maskz_mov_epi16 (__M, __V);
5032 _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (avx512_max_epu16);
5033 }
5034
5035 extern __inline short
5036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5037 _mm_mask_reduce_min_epi16 (__mmask16 __M, __m128i __V)
5038 {
5039 __V = _mm_mask_mov_epi16 (_mm_avx512_set1_epi16 (32767), __M, __V);
5040 _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (avx512_min_epi16);
5041 }
5042
5043 extern __inline unsigned short
5044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5045 _mm_mask_reduce_min_epu16 (__mmask16 __M, __m128i __V)
5046 {
5047 __V = _mm_mask_mov_epi16 (_mm_avx512_set1_epi16 (-1), __M, __V);
5048 _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (avx512_min_epu16);
5049 }
5050
5051 extern __inline short
5052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5053 _mm256_mask_reduce_add_epi16 (__mmask16 __M, __m256i __W)
5054 {
5055 __W = _mm256_maskz_mov_epi16 (__M, __W);
5056 _MM256_AVX512_REDUCE_OPERATOR_BASIC_EPI16 (+);
5057 }
5058
5059 extern __inline short
5060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5061 _mm256_mask_reduce_mul_epi16 (__mmask16 __M, __m256i __W)
5062 {
5063 __W = _mm256_mask_mov_epi16 (_mm256_avx512_set1_epi16 (1), __M, __W);
5064 _MM256_AVX512_REDUCE_OPERATOR_BASIC_EPI16 (*);
5065 }
5066
5067 extern __inline short
5068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5069 _mm256_mask_reduce_and_epi16 (__mmask16 __M, __m256i __W)
5070 {
5071 __W = _mm256_mask_mov_epi16 (_mm256_avx512_set1_epi16 (-1), __M, __W);
5072 _MM256_AVX512_REDUCE_OPERATOR_BASIC_EPI16 (&);
5073 }
5074
5075 extern __inline short
5076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5077 _mm256_mask_reduce_or_epi16 (__mmask16 __M, __m256i __W)
5078 {
5079 __W = _mm256_maskz_mov_epi16 (__M, __W);
5080 _MM256_AVX512_REDUCE_OPERATOR_BASIC_EPI16 (|);
5081 }
5082
5083 extern __inline short
5084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5085 _mm256_mask_reduce_max_epi16 (__mmask16 __M, __m256i __V)
5086 {
5087 __V = _mm256_mask_mov_epi16 (_mm256_avx512_set1_epi16 (-32767-1), __M, __V);
5088 _MM256_AVX512_REDUCE_OPERATOR_MAX_MIN_EP16 (max_epi16);
5089 }
5090
5091 extern __inline unsigned short
5092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5093 _mm256_mask_reduce_max_epu16 (__mmask16 __M, __m256i __V)
5094 {
5095 __V = _mm256_maskz_mov_epi16 (__M, __V);
5096 _MM256_AVX512_REDUCE_OPERATOR_MAX_MIN_EP16 (max_epu16);
5097 }
5098
5099 extern __inline short
5100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5101 _mm256_mask_reduce_min_epi16 (__mmask16 __M, __m256i __V)
5102 {
5103 __V = _mm256_mask_mov_epi16 (_mm256_avx512_set1_epi16 (32767), __M, __V);
5104 _MM256_AVX512_REDUCE_OPERATOR_MAX_MIN_EP16 (min_epi16);
5105 }
5106
5107 extern __inline unsigned short
5108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5109 _mm256_mask_reduce_min_epu16 (__mmask16 __M, __m256i __V)
5110 {
5111 __V = _mm256_mask_mov_epi16 (_mm256_avx512_set1_epi16 (-1), __M, __V);
5112 _MM256_AVX512_REDUCE_OPERATOR_MAX_MIN_EP16 (min_epu16);
5113 }
5114
5115 extern __inline char
5116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5117 _mm_mask_reduce_add_epi8 (__mmask16 __M, __m128i __W)
5118 {
5119 __W = _mm_maskz_mov_epi8 (__M, __W);
5120 _MM_REDUCE_OPERATOR_BASIC_EPI8 (+);
5121 }
5122
5123 extern __inline char
5124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5125 _mm_mask_reduce_mul_epi8 (__mmask16 __M, __m128i __W)
5126 {
5127 __W = _mm_mask_mov_epi8 (_mm_avx512_set1_epi8 (1), __M, __W);
5128 _MM_REDUCE_OPERATOR_BASIC_EPI8 (*);
5129 }
5130
5131 extern __inline char
5132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5133 _mm_mask_reduce_and_epi8 (__mmask16 __M, __m128i __W)
5134 {
5135 __W = _mm_mask_mov_epi8 (_mm_avx512_set1_epi8 (-1), __M, __W);
5136 _MM_REDUCE_OPERATOR_BASIC_EPI8 (&);
5137 }
5138
5139 extern __inline char
5140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5141 _mm_mask_reduce_or_epi8 (__mmask16 __M, __m128i __W)
5142 {
5143 __W = _mm_maskz_mov_epi8 (__M, __W);
5144 _MM_REDUCE_OPERATOR_BASIC_EPI8 (|);
5145 }
5146
5147 extern __inline signed char
5148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5149 _mm_mask_reduce_max_epi8 (__mmask16 __M, __m128i __V)
5150 {
5151 __V = _mm_mask_mov_epi8 (_mm_avx512_set1_epi8 (-127-1), __M, __V);
5152 _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (avx512_max_epi8);
5153 }
5154
5155 extern __inline unsigned char
5156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5157 _mm_mask_reduce_max_epu8 (__mmask16 __M, __m128i __V)
5158 {
5159 __V = _mm_maskz_mov_epi8 (__M, __V);
5160 _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (avx512_max_epu8);
5161 }
5162
5163 extern __inline signed char
5164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5165 _mm_mask_reduce_min_epi8 (__mmask16 __M, __m128i __V)
5166 {
5167 __V = _mm_mask_mov_epi8 (_mm_avx512_set1_epi8 (127), __M, __V);
5168 _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (avx512_min_epi8);
5169 }
5170
5171 extern __inline unsigned char
5172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5173 _mm_mask_reduce_min_epu8 (__mmask16 __M, __m128i __V)
5174 {
5175 __V = _mm_mask_mov_epi8 (_mm_avx512_set1_epi8 (-1), __M, __V);
5176 _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (avx512_min_epu8);
5177 }
5178
5179 extern __inline char
5180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5181 _mm256_mask_reduce_add_epi8 (__mmask32 __M, __m256i __W)
5182 {
5183 __W = _mm256_maskz_mov_epi8 (__M, __W);
5184 _MM256_AVX512_REDUCE_OPERATOR_BASIC_EPI8 (+);
5185 }
5186
5187 extern __inline char
5188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5189 _mm256_mask_reduce_mul_epi8 (__mmask32 __M, __m256i __W)
5190 {
5191 __W = _mm256_mask_mov_epi8 (_mm256_avx512_set1_epi8 (1), __M, __W);
5192 _MM256_AVX512_REDUCE_OPERATOR_BASIC_EPI8 (*);
5193 }
5194
5195 extern __inline char
5196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5197 _mm256_mask_reduce_and_epi8 (__mmask32 __M, __m256i __W)
5198 {
5199 __W = _mm256_mask_mov_epi8 (_mm256_avx512_set1_epi8 (-1), __M, __W);
5200 _MM256_AVX512_REDUCE_OPERATOR_BASIC_EPI8 (&);
5201 }
5202
5203 extern __inline char
5204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5205 _mm256_mask_reduce_or_epi8 (__mmask32 __M, __m256i __W)
5206 {
5207 __W = _mm256_maskz_mov_epi8 (__M, __W);
5208 _MM256_AVX512_REDUCE_OPERATOR_BASIC_EPI8 (|);
5209 }
5210
5211 extern __inline signed char
5212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5213 _mm256_mask_reduce_max_epi8 (__mmask32 __M, __m256i __V)
5214 {
5215 __V = _mm256_mask_mov_epi8 (_mm256_avx512_set1_epi8 (-127-1), __M, __V);
5216 _MM256_AVX512_REDUCE_OPERATOR_MAX_MIN_EP8 (max_epi8);
5217 }
5218
5219 extern __inline unsigned char
5220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5221 _mm256_mask_reduce_max_epu8 (__mmask32 __M, __m256i __V)
5222 {
5223 __V = _mm256_maskz_mov_epi8 (__M, __V);
5224 _MM256_AVX512_REDUCE_OPERATOR_MAX_MIN_EP8 (max_epu8);
5225 }
5226
5227 extern __inline signed char
5228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5229 _mm256_mask_reduce_min_epi8 (__mmask32 __M, __m256i __V)
5230 {
5231 __V = _mm256_mask_mov_epi8 (_mm256_avx512_set1_epi8 (127), __M, __V);
5232 _MM256_AVX512_REDUCE_OPERATOR_MAX_MIN_EP8 (min_epi8);
5233 }
5234
5235 extern __inline unsigned char
5236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5237 _mm256_mask_reduce_min_epu8 (__mmask32 __M, __m256i __V)
5238 {
5239 __V = _mm256_mask_mov_epi8 (_mm256_avx512_set1_epi8 (-1), __M, __V);
5240 _MM256_AVX512_REDUCE_OPERATOR_MAX_MIN_EP8 (min_epu8);
5241 }
5242
5243 #ifdef __DISABLE_AVX512VLBW__
5244 #undef __DISABLE_AVX512VLBW__
5245 #pragma GCC pop_options
5246 #endif /* __DISABLE_AVX512VLBW__ */
5247
5248 #endif /* _AVX512VLBWINTRIN_H_INCLUDED */