]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/avx512vlbwintrin.h
Update copyright years.
[thirdparty/gcc.git] / gcc / config / i386 / avx512vlbwintrin.h
CommitLineData
a945c346 1/* Copyright (C) 2014-2024 Free Software Foundation, Inc.
936c0fe4
AI
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512VLBWINTRIN_H_INCLUDED
29#define _AVX512VLBWINTRIN_H_INCLUDED
30
fd514717 31#if !defined(__AVX512VL__) || !defined(__AVX512BW__) || defined (__EVEX512__)
936c0fe4 32#pragma GCC push_options
fd514717 33#pragma GCC target("avx512vl,avx512bw,no-evex512")
936c0fe4
AI
34#define __DISABLE_AVX512VLBW__
35#endif /* __AVX512VLBW__ */
36
93103603
SP
37/* Internal data types for implementing the intrinsics. */
38typedef short __v16hi_u __attribute__ ((__vector_size__ (32), \
39 __may_alias__, __aligned__ (1)));
40typedef short __v8hi_u __attribute__ ((__vector_size__ (16), \
41 __may_alias__, __aligned__ (1)));
42typedef char __v32qi_u __attribute__ ((__vector_size__ (32), \
43 __may_alias__, __aligned__ (1)));
44typedef char __v16qi_u __attribute__ ((__vector_size__ (16), \
45 __may_alias__, __aligned__ (1)));
936c0fe4 46
e9529ff3
HJ
47extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
48_mm_avx512_set1_epi32 (int __A)
49{
50 return _mm_avx512_set_epi32 (__A, __A, __A, __A);
51}
52
53extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
54_mm_avx512_set1_epi16 (short __A)
55{
56 return _mm_avx512_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
57}
58
59extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
60_mm_avx512_set1_epi8 (char __A)
61{
62 return _mm_avx512_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
63 __A, __A, __A, __A, __A, __A, __A, __A);
64}
65
66extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
67_mm256_avx512_set_epi16 (short __q15, short __q14, short __q13, short __q12,
68 short __q11, short __q10, short __q09, short __q08,
69 short __q07, short __q06, short __q05, short __q04,
70 short __q03, short __q02, short __q01, short __q00)
71{
72 return __extension__ (__m256i)(__v16hi){
73 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
74 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
75 };
76}
77
78extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
79_mm256_avx512_set_epi8 (char __q31, char __q30, char __q29, char __q28,
80 char __q27, char __q26, char __q25, char __q24,
81 char __q23, char __q22, char __q21, char __q20,
82 char __q19, char __q18, char __q17, char __q16,
83 char __q15, char __q14, char __q13, char __q12,
84 char __q11, char __q10, char __q09, char __q08,
85 char __q07, char __q06, char __q05, char __q04,
86 char __q03, char __q02, char __q01, char __q00)
87{
88 return __extension__ (__m256i)(__v32qi){
89 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
90 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
91 __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
92 __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31
93 };
94}
95
96extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
97_mm256_avx512_set1_epi16 (short __A)
98{
99 return _mm256_avx512_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A,
100 __A, __A, __A, __A, __A, __A, __A, __A);
101}
102
103extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
104_mm256_avx512_set1_epi32 (int __A)
105{
106 return __extension__ (__m256i)(__v8si){ __A, __A, __A, __A,
107 __A, __A, __A, __A };
108}
109
110extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
111_mm256_avx512_set1_epi8 (char __A)
112{
113 return _mm256_avx512_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
114 __A, __A, __A, __A, __A, __A, __A, __A,
115 __A, __A, __A, __A, __A, __A, __A, __A,
116 __A, __A, __A, __A, __A, __A, __A, __A);
117}
118
119extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
120_mm_avx512_max_epi16 (__m128i __A, __m128i __B)
121{
122 return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
123}
124
125extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
126_mm_avx512_min_epi16 (__m128i __A, __m128i __B)
127{
128 return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
129}
130
131extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
132_mm_avx512_max_epu16 (__m128i __X, __m128i __Y)
133{
134 return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi)__X, (__v8hi)__Y);
135}
136
137extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
138_mm_avx512_min_epu16 (__m128i __X, __m128i __Y)
139{
140 return (__m128i) __builtin_ia32_pminuw128 ((__v8hi)__X, (__v8hi)__Y);
141}
142
143extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
144_mm_avx512_max_epi8 (__m128i __X, __m128i __Y)
145{
146 return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi)__X, (__v16qi)__Y);
147}
148
149extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
150_mm_avx512_min_epi8 (__m128i __X, __m128i __Y)
151{
152 return (__m128i) __builtin_ia32_pminsb128 ((__v16qi)__X, (__v16qi)__Y);
153}
154
155extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
156_mm_avx512_max_epu8 (__m128i __A, __m128i __B)
157{
158 return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
159}
160
161extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
162_mm_avx512_min_epu8 (__m128i __A, __m128i __B)
163{
164 return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
165}
166
936c0fe4
AI
167extern __inline __m256i
168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
169_mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
170{
171 return (__m256i) __builtin_ia32_movdquqi256_mask ((__v32qi) __A,
172 (__v32qi) __W,
173 (__mmask32) __U);
174}
175
e9529ff3
HJ
176extern __inline __m256i
177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
178_mm256_avx512_max_epi16 (__m256i __A, __m256i __B)
179{
180 return (__m256i)__builtin_ia32_pmaxsw256 ((__v16hi)__A, (__v16hi)__B);
181}
182
183extern __inline __m256i
184__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
185_mm256_avx512_min_epi16 (__m256i __A, __m256i __B)
186{
187 return (__m256i)__builtin_ia32_pminsw256 ((__v16hi)__A, (__v16hi)__B);
188}
189
190extern __inline __m256i
191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
192_mm256_avx512_max_epu16 (__m256i __A, __m256i __B)
193{
194 return (__m256i)__builtin_ia32_pmaxuw256 ((__v16hi)__A, (__v16hi)__B);
195}
196
197extern __inline __m256i
198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
199_mm256_avx512_min_epu16 (__m256i __A, __m256i __B)
200{
201 return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__A, (__v16hi)__B);
202}
203
204#ifdef __OPTIMIZE__
205extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
206_mm256_avx512_insertf128_ps (__m256 __X, __m128 __Y, const int __O)
207{
208 return (__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)__X,
209 (__v4sf)__Y,
210 __O);
211}
212
213extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
214_mm256_avx512_extractf128_pd (__m256d __X, const int __N)
215{
216 return (__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)__X, __N);
217}
218
219extern __inline __m128i
220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
221_mm256_avx512_extracti128_si256 (__m256i __X, const int __M)
222{
223 return (__m128i) __builtin_ia32_extract128i256 ((__v4di)__X, __M);
224}
225#else
226#define _mm256_avx512_insertf128_ps(X, Y, O) \
227 ((__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)(__m256)(X), \
228 (__v4sf)(__m128)(Y), \
229 (int)(O)))
230
231#define _mm256_avx512_extractf128_pd(X, N) \
232 ((__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)(__m256d)(X), \
233 (int)(N)))
234
235#define _mm256_avx512_extracti128_si256(X, M) \
236 ((__m128i) __builtin_ia32_extract128i256 ((__v4di)(__m256i)(X), (int)(M)))
237#endif
238
239#define _MM256_AVX512_REDUCE_OPERATOR_BASIC_EPI16(op) \
240 __v8hi __T1 = (__v8hi)_mm256_avx512_extracti128_si256 (__W, 0); \
241 __v8hi __T2 = (__v8hi)_mm256_avx512_extracti128_si256 (__W, 1); \
242 __v8hi __T3 = __T1 op __T2; \
243 __v8hi __T4 = __builtin_shufflevector (__T3, __T3, 4, 5, 6, 7, 4, 5, 6, 7); \
244 __v8hi __T5 = __T3 op __T4; \
245 __v8hi __T6 = __builtin_shufflevector (__T5, __T5, 2, 3, 2, 3, 4, 5, 6, 7); \
246 __v8hi __T7 = __T5 op __T6; \
247 __v8hi __T8 = __builtin_shufflevector (__T7, __T7, 1, 1, 2, 3, 4, 5, 6, 7); \
248 __v8hi __T9 = __T7 op __T8; \
249 return __T9[0]
250
251#define _MM256_AVX512_REDUCE_OPERATOR_MAX_MIN_EP16(op) \
252 __m128i __T1 = _mm256_avx512_extracti128_si256 (__V, 0); \
253 __m128i __T2 = _mm256_avx512_extracti128_si256 (__V, 1); \
254 __m128i __T3 = _mm_avx512_##op (__T1, __T2); \
255 __m128i __T4 = (__m128i)__builtin_shufflevector ((__v8hi)__T3, \
256 (__v8hi)__T3, 4, 5, 6, 7, 4, 5, 6, 7); \
257 __m128i __T5 = _mm_avx512_##op (__T3, __T4); \
258 __m128i __T6 = (__m128i)__builtin_shufflevector ((__v8hi)__T5, \
259 (__v8hi)__T5, 2, 3, 2, 3, 4, 5, 6, 7); \
260 __m128i __T7 = _mm_avx512_##op (__T5, __T6); \
261 __m128i __T8 = (__m128i)__builtin_shufflevector ((__v8hi)__T7, \
262 (__v8hi)__T7, 1, 1, 2, 3, 4, 5, 6, 7); \
263 __v8hi __T9 = (__v8hi)_mm_avx512_##op (__T7, __T8); \
264 return __T9[0]
265
266#define _MM256_AVX512_REDUCE_OPERATOR_BASIC_EPI8(op) \
267 __v16qi __T1 = (__v16qi)_mm256_avx512_extracti128_si256 (__W, 0); \
268 __v16qi __T2 = (__v16qi)_mm256_avx512_extracti128_si256 (__W, 1); \
269 __v16qi __T3 = __T1 op __T2; \
270 __v16qi __T4 = __builtin_shufflevector (__T3, __T3, \
271 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15); \
272 __v16qi __T5 = __T3 op __T4; \
273 __v16qi __T6 = __builtin_shufflevector (__T5, __T5, \
274 4, 5, 6, 7, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
275 __v16qi __T7 = __T5 op __T6; \
276 __v16qi __T8 = __builtin_shufflevector (__T7, __T7, \
277 2, 3, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
278 __v16qi __T9 = __T7 op __T8; \
279 __v16qi __T10 = __builtin_shufflevector (__T9, __T9, \
280 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
281 __v16qi __T11 = __T9 op __T10; \
282 return __T11[0]
283
284#define _MM256_AVX512_REDUCE_OPERATOR_MAX_MIN_EP8(op) \
285 __m128i __T1 = _mm256_avx512_extracti128_si256 (__V, 0); \
286 __m128i __T2 = _mm256_avx512_extracti128_si256 (__V, 1); \
287 __m128i __T3 = _mm_avx512_##op (__T1, __T2); \
288 __m128i __T4 = (__m128i)__builtin_shufflevector ((__v16qi)__T3, \
289 (__v16qi)__T3, \
290 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15); \
291 __m128i __T5 = _mm_avx512_##op (__T3, __T4); \
292 __m128i __T6 = (__m128i)__builtin_shufflevector ((__v16qi)__T5, \
293 (__v16qi)__T5, \
294 4, 5, 6, 7, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
295 __m128i __T7 = _mm_avx512_##op (__T5, __T6); \
296 __m128i __T8 = (__m128i)__builtin_shufflevector ((__v16qi)__T7, \
297 (__v16qi)__T5, \
298 2, 3, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
299 __m128i __T9 = _mm_avx512_##op (__T7, __T8); \
300 __m128i __T10 = (__m128i)__builtin_shufflevector ((__v16qi)__T9, \
301 (__v16qi)__T9, \
302 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
303 __v16qi __T11 = (__v16qi)_mm_avx512_##op (__T9, __T10); \
304 return __T11[0]
305
936c0fe4
AI
306extern __inline __m256i
307__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
308_mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A)
309{
310 return (__m256i) __builtin_ia32_movdquqi256_mask ((__v32qi) __A,
311 (__v32qi)
fd79b414 312 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
313 (__mmask32) __U);
314}
315
316extern __inline __m128i
317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
318_mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
319{
320 return (__m128i) __builtin_ia32_movdquqi128_mask ((__v16qi) __A,
321 (__v16qi) __W,
322 (__mmask16) __U);
323}
324
325extern __inline __m128i
326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
327_mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
328{
329 return (__m128i) __builtin_ia32_movdquqi128_mask ((__v16qi) __A,
330 (__v16qi)
fd79b414 331 _mm_avx512_setzero_si128 (),
936c0fe4
AI
332 (__mmask16) __U);
333}
334
93103603
SP
335extern __inline void
336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
337_mm256_storeu_epi8 (void *__P, __m256i __A)
338{
339 *(__v32qi_u *) __P = (__v32qi_u) __A;
340}
341
936c0fe4
AI
342extern __inline void
343__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
344_mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
345{
fc9cf6da 346 __builtin_ia32_storedquqi256_mask ((char *) __P,
936c0fe4
AI
347 (__v32qi) __A,
348 (__mmask32) __U);
349}
350
93103603
SP
351extern __inline void
352__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
353_mm_storeu_epi8 (void *__P, __m128i __A)
354{
355 *(__v16qi_u *) __P = (__v16qi_u) __A;
356}
357
936c0fe4
AI
358extern __inline void
359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
360_mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
361{
fc9cf6da 362 __builtin_ia32_storedquqi128_mask ((char *) __P,
936c0fe4
AI
363 (__v16qi) __A,
364 (__mmask16) __U);
365}
366
93103603
SP
367extern __inline __m256i
368__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
369_mm256_loadu_epi16 (void const *__P)
370{
371 return (__m256i) (*(__v16hi_u *) __P);
372}
373
936c0fe4
AI
374extern __inline __m256i
375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
376_mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
377{
fc9cf6da 378 return (__m256i) __builtin_ia32_loaddquhi256_mask ((const short *) __P,
936c0fe4
AI
379 (__v16hi) __W,
380 (__mmask16) __U);
381}
382
383extern __inline __m256i
384__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
385_mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
386{
fc9cf6da 387 return (__m256i) __builtin_ia32_loaddquhi256_mask ((const short *) __P,
936c0fe4 388 (__v16hi)
fd79b414 389 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
390 (__mmask16) __U);
391}
392
93103603
SP
393extern __inline __m128i
394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
395_mm_loadu_epi16 (void const *__P)
396{
397 return (__m128i) (*(__v8hi_u *) __P);
398}
399
936c0fe4
AI
400extern __inline __m128i
401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
402_mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
403{
fc9cf6da 404 return (__m128i) __builtin_ia32_loaddquhi128_mask ((const short *) __P,
936c0fe4
AI
405 (__v8hi) __W,
406 (__mmask8) __U);
407}
408
409extern __inline __m128i
410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
411_mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P)
412{
fc9cf6da 413 return (__m128i) __builtin_ia32_loaddquhi128_mask ((const short *) __P,
936c0fe4 414 (__v8hi)
fd79b414 415 _mm_avx512_setzero_si128 (),
936c0fe4
AI
416 (__mmask8) __U);
417}
418
419
420extern __inline __m256i
421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
422_mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
423{
424 return (__m256i) __builtin_ia32_movdquhi256_mask ((__v16hi) __A,
425 (__v16hi) __W,
426 (__mmask16) __U);
427}
428
429extern __inline __m256i
430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
431_mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
432{
433 return (__m256i) __builtin_ia32_movdquhi256_mask ((__v16hi) __A,
434 (__v16hi)
fd79b414 435 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
436 (__mmask16) __U);
437}
438
439extern __inline __m128i
440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
441_mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
442{
443 return (__m128i) __builtin_ia32_movdquhi128_mask ((__v8hi) __A,
444 (__v8hi) __W,
445 (__mmask8) __U);
446}
447
448extern __inline __m128i
449__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
450_mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
451{
452 return (__m128i) __builtin_ia32_movdquhi128_mask ((__v8hi) __A,
453 (__v8hi)
fd79b414 454 _mm_avx512_setzero_si128 (),
936c0fe4
AI
455 (__mmask8) __U);
456}
457
93103603
SP
458extern __inline __m256i
459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
460_mm256_loadu_epi8 (void const *__P)
461{
462 return (__m256i) (*(__v32qi_u *) __P);
463}
464
936c0fe4
AI
465extern __inline __m256i
466__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
467_mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
468{
fc9cf6da 469 return (__m256i) __builtin_ia32_loaddquqi256_mask ((const char *) __P,
936c0fe4
AI
470 (__v32qi) __W,
471 (__mmask32) __U);
472}
473
474extern __inline __m256i
475__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
476_mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
477{
fc9cf6da 478 return (__m256i) __builtin_ia32_loaddquqi256_mask ((const char *) __P,
936c0fe4 479 (__v32qi)
fd79b414 480 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
481 (__mmask32) __U);
482}
483
93103603
SP
484extern __inline __m128i
485__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
486_mm_loadu_epi8 (void const *__P)
487{
488 return (__m128i) (*(__v16qi_u *) __P);
489}
490
936c0fe4
AI
491extern __inline __m128i
492__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
493_mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
494{
fc9cf6da 495 return (__m128i) __builtin_ia32_loaddquqi128_mask ((const char *) __P,
936c0fe4
AI
496 (__v16qi) __W,
497 (__mmask16) __U);
498}
499
500extern __inline __m128i
501__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
502_mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
503{
fc9cf6da 504 return (__m128i) __builtin_ia32_loaddquqi128_mask ((const char *) __P,
936c0fe4 505 (__v16qi)
fd79b414 506 _mm_avx512_setzero_si128 (),
936c0fe4
AI
507 (__mmask16) __U);
508}
509
e8571019
HJ
510extern __inline __m128i
511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
512_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
513{
514 return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A,
515 (__v8hi) __W,
516 (__mmask8) __U);
517}
518
519extern __inline __m128i
520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
521_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
522{
523 return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A,
524 (__v16qi) __W,
525 (__mmask16) __U);
526}
527
528extern __inline __m256i
529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
530_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
531{
532 return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A,
533 (__v16hi) __W,
534 (__mmask16) __U);
535}
536
537extern __inline __m256i
538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
539_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
540{
541 return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A,
542 (__v32qi) __W,
543 (__mmask32) __U);
544}
545
936c0fe4
AI
546extern __inline __m128i
547__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
548_mm256_cvtepi16_epi8 (__m256i __A)
549{
550
551 return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
4bbabb2a 552 (__v16qi)_mm_avx512_undefined_si128(),
936c0fe4
AI
553 (__mmask16) -1);
554}
555
c46f9051
JK
556extern __inline void
557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
558_mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M,__m256i __A)
559{
560 __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P , (__v16hi) __A, __M);
561}
562
936c0fe4
AI
563extern __inline __m128i
564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
565_mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
566{
567 return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
568 (__v16qi) __O, __M);
569}
570
571extern __inline __m128i
572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
573_mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A)
574{
575 return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
576 (__v16qi)
fd79b414 577 _mm_avx512_setzero_si128 (),
936c0fe4
AI
578 __M);
579}
580
581extern __inline __m128i
582__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
583_mm_cvtsepi16_epi8 (__m128i __A)
584{
585
586 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
4bbabb2a 587 (__v16qi)_mm_avx512_undefined_si128(),
936c0fe4
AI
588 (__mmask8) -1);
589}
590
c46f9051
JK
591extern __inline void
592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
593_mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M,__m128i __A)
594{
4a948703 595 __builtin_ia32_pmovswb128mem_mask ((unsigned long long *) __P , (__v8hi) __A, __M);
c46f9051
JK
596}
597
936c0fe4
AI
598extern __inline __m128i
599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
600_mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
601{
602 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
603 (__v16qi) __O, __M);
604}
605
606extern __inline __m128i
607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
608_mm_maskz_cvtsepi16_epi8 (__mmask8 __M, __m128i __A)
609{
610 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
611 (__v16qi)
fd79b414 612 _mm_avx512_setzero_si128 (),
936c0fe4
AI
613 __M);
614}
615
616extern __inline __m128i
617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
618_mm256_cvtsepi16_epi8 (__m256i __A)
619{
620
621 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
4bbabb2a 622 (__v16qi)_mm_avx512_undefined_si128(),
936c0fe4
AI
623 (__mmask16) -1);
624}
625
c46f9051
JK
626extern __inline void
627__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
628_mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M,__m256i __A)
629{
630 __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P , (__v16hi) __A, __M);
631}
632
936c0fe4
AI
633extern __inline __m128i
634__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
635_mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
636{
637 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
638 (__v16qi) __O, __M);
639}
640
641extern __inline __m128i
642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
643_mm256_maskz_cvtsepi16_epi8 (__mmask16 __M, __m256i __A)
644{
645 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
646 (__v16qi)
fd79b414 647 _mm_avx512_setzero_si128 (),
936c0fe4
AI
648 __M);
649}
650
651extern __inline __m128i
652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
653_mm_cvtusepi16_epi8 (__m128i __A)
654{
655
656 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
4bbabb2a 657 (__v16qi)_mm_avx512_undefined_si128(),
936c0fe4
AI
658 (__mmask8) -1);
659}
660
c46f9051
JK
661extern __inline void
662__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
663_mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M,__m128i __A)
664{
4a948703 665 __builtin_ia32_pmovuswb128mem_mask ((unsigned long long *) __P , (__v8hi) __A, __M);
c46f9051
JK
666}
667
936c0fe4
AI
668extern __inline __m128i
669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
670_mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
671{
672 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
673 (__v16qi) __O,
674 __M);
675}
676
677extern __inline __m128i
678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
679_mm_maskz_cvtusepi16_epi8 (__mmask8 __M, __m128i __A)
680{
681 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
682 (__v16qi)
fd79b414 683 _mm_avx512_setzero_si128 (),
936c0fe4
AI
684 __M);
685}
686
687extern __inline __m128i
688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
689_mm256_cvtusepi16_epi8 (__m256i __A)
690{
691
692 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
4bbabb2a 693 (__v16qi)_mm_avx512_undefined_si128(),
936c0fe4
AI
694 (__mmask16) -1);
695}
696
c46f9051
JK
697extern __inline void
698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
699_mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M,__m256i __A)
700{
701 __builtin_ia32_pmovuswb256mem_mask ((__v16qi *) __P , (__v16hi) __A, __M);
702}
703
936c0fe4
AI
704extern __inline __m128i
705__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
706_mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
707{
708 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
709 (__v16qi) __O,
710 __M);
711}
712
713extern __inline __m128i
714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
715_mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A)
716{
717 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
718 (__v16qi)
fd79b414 719 _mm_avx512_setzero_si128 (),
936c0fe4
AI
720 __M);
721}
722
723extern __inline __m256i
724__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
725_mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A)
726{
727 return (__m256i) __builtin_ia32_pbroadcastb256_mask ((__v16qi) __A,
728 (__v32qi) __O,
729 __M);
730}
731
732extern __inline __m256i
733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
734_mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A)
735{
736 return (__m256i) __builtin_ia32_pbroadcastb256_mask ((__v16qi) __A,
737 (__v32qi)
fd79b414 738 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
739 __M);
740}
741
742extern __inline __m256i
743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
744_mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A)
745{
746 return (__m256i) __builtin_ia32_pbroadcastb256_gpr_mask (__A,
747 (__v32qi) __O,
748 __M);
749}
750
751extern __inline __m256i
752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
753_mm256_maskz_set1_epi8 (__mmask32 __M, char __A)
754{
755 return (__m256i) __builtin_ia32_pbroadcastb256_gpr_mask (__A,
756 (__v32qi)
fd79b414 757 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
758 __M);
759}
760
761extern __inline __m128i
762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
763_mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A)
764{
765 return (__m128i) __builtin_ia32_pbroadcastb128_mask ((__v16qi) __A,
766 (__v16qi) __O,
767 __M);
768}
769
770extern __inline __m128i
771__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
772_mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A)
773{
774 return (__m128i) __builtin_ia32_pbroadcastb128_mask ((__v16qi) __A,
775 (__v16qi)
fd79b414 776 _mm_avx512_setzero_si128 (),
936c0fe4
AI
777 __M);
778}
779
780extern __inline __m128i
781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
782_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A)
783{
784 return (__m128i) __builtin_ia32_pbroadcastb128_gpr_mask (__A,
785 (__v16qi) __O,
786 __M);
787}
788
789extern __inline __m128i
790__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
791_mm_maskz_set1_epi8 (__mmask16 __M, char __A)
792{
793 return (__m128i) __builtin_ia32_pbroadcastb128_gpr_mask (__A,
794 (__v16qi)
fd79b414 795 _mm_avx512_setzero_si128 (),
936c0fe4
AI
796 __M);
797}
798
799extern __inline __m256i
800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
801_mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A)
802{
803 return (__m256i) __builtin_ia32_pbroadcastw256_mask ((__v8hi) __A,
804 (__v16hi) __O,
805 __M);
806}
807
808extern __inline __m256i
809__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
810_mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A)
811{
812 return (__m256i) __builtin_ia32_pbroadcastw256_mask ((__v8hi) __A,
813 (__v16hi)
fd79b414 814 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
815 __M);
816}
817
818extern __inline __m256i
819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
820_mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A)
821{
822 return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A,
823 (__v16hi) __O,
824 __M);
825}
826
827extern __inline __m256i
828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
829_mm256_maskz_set1_epi16 (__mmask16 __M, short __A)
830{
831 return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A,
832 (__v16hi)
fd79b414 833 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
834 __M);
835}
836
837extern __inline __m128i
838__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
839_mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
840{
841 return (__m128i) __builtin_ia32_pbroadcastw128_mask ((__v8hi) __A,
842 (__v8hi) __O,
843 __M);
844}
845
846extern __inline __m128i
847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
848_mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A)
849{
850 return (__m128i) __builtin_ia32_pbroadcastw128_mask ((__v8hi) __A,
851 (__v8hi)
fd79b414 852 _mm_avx512_setzero_si128 (),
936c0fe4
AI
853 __M);
854}
855
856extern __inline __m128i
857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
858_mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A)
859{
860 return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A,
861 (__v8hi) __O,
862 __M);
863}
864
865extern __inline __m128i
866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
867_mm_maskz_set1_epi16 (__mmask8 __M, short __A)
868{
869 return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A,
870 (__v8hi)
fd79b414 871 _mm_avx512_setzero_si128 (),
936c0fe4
AI
872 __M);
873}
874
875extern __inline __m256i
876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
877_mm256_permutexvar_epi16 (__m256i __A, __m256i __B)
878{
879 return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
880 (__v16hi) __A,
881 (__v16hi)
fd79b414 882 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
883 (__mmask16) -1);
884}
885
886extern __inline __m256i
887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
888_mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A,
889 __m256i __B)
890{
891 return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
892 (__v16hi) __A,
893 (__v16hi)
fd79b414 894 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
895 (__mmask16) __M);
896}
897
898extern __inline __m256i
899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
900_mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
901 __m256i __B)
902{
903 return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
904 (__v16hi) __A,
905 (__v16hi) __W,
906 (__mmask16) __M);
907}
908
909extern __inline __m128i
910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
911_mm_permutexvar_epi16 (__m128i __A, __m128i __B)
912{
913 return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
914 (__v8hi) __A,
915 (__v8hi)
fd79b414 916 _mm_avx512_setzero_si128 (),
936c0fe4
AI
917 (__mmask8) -1);
918}
919
920extern __inline __m128i
921__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
922_mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
923{
924 return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
925 (__v8hi) __A,
926 (__v8hi)
fd79b414 927 _mm_avx512_setzero_si128 (),
936c0fe4
AI
928 (__mmask8) __M);
929}
930
931extern __inline __m128i
932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
933_mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
934 __m128i __B)
935{
936 return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
937 (__v8hi) __A,
938 (__v8hi) __W,
939 (__mmask8) __M);
940}
941
942extern __inline __m256i
943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
944_mm256_permutex2var_epi16 (__m256i __A, __m256i __I, __m256i __B)
945{
946 return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I
947 /* idx */ ,
948 (__v16hi) __A,
949 (__v16hi) __B,
c42b0bdf 950 (__mmask16) -1);
936c0fe4
AI
951}
952
953extern __inline __m256i
954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
955_mm256_mask_permutex2var_epi16 (__m256i __A, __mmask16 __U,
956 __m256i __I, __m256i __B)
957{
958 return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I
959 /* idx */ ,
960 (__v16hi) __A,
961 (__v16hi) __B,
962 (__mmask16)
963 __U);
964}
965
966extern __inline __m256i
967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
968_mm256_mask2_permutex2var_epi16 (__m256i __A, __m256i __I,
969 __mmask16 __U, __m256i __B)
970{
971 return (__m256i) __builtin_ia32_vpermi2varhi256_mask ((__v16hi) __A,
972 (__v16hi) __I
973 /* idx */ ,
974 (__v16hi) __B,
975 (__mmask16)
976 __U);
977}
978
979extern __inline __m256i
980__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
981_mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A,
982 __m256i __I, __m256i __B)
983{
984 return (__m256i) __builtin_ia32_vpermt2varhi256_maskz ((__v16hi) __I
985 /* idx */ ,
986 (__v16hi) __A,
987 (__v16hi) __B,
988 (__mmask16)
989 __U);
990}
991
992extern __inline __m128i
993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
994_mm_permutex2var_epi16 (__m128i __A, __m128i __I, __m128i __B)
995{
996 return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I
997 /* idx */ ,
998 (__v8hi) __A,
999 (__v8hi) __B,
c42b0bdf 1000 (__mmask8) -1);
936c0fe4
AI
1001}
1002
1003extern __inline __m128i
1004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1005_mm_mask_permutex2var_epi16 (__m128i __A, __mmask8 __U, __m128i __I,
1006 __m128i __B)
1007{
1008 return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I
1009 /* idx */ ,
1010 (__v8hi) __A,
1011 (__v8hi) __B,
1012 (__mmask8)
1013 __U);
1014}
1015
1016extern __inline __m128i
1017__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1018_mm_mask2_permutex2var_epi16 (__m128i __A, __m128i __I, __mmask8 __U,
1019 __m128i __B)
1020{
1021 return (__m128i) __builtin_ia32_vpermi2varhi128_mask ((__v8hi) __A,
1022 (__v8hi) __I
1023 /* idx */ ,
1024 (__v8hi) __B,
1025 (__mmask8)
1026 __U);
1027}
1028
1029extern __inline __m128i
1030__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1031_mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I,
1032 __m128i __B)
1033{
1034 return (__m128i) __builtin_ia32_vpermt2varhi128_maskz ((__v8hi) __I
1035 /* idx */ ,
1036 (__v8hi) __A,
1037 (__v8hi) __B,
1038 (__mmask8)
1039 __U);
1040}
1041
1042extern __inline __m256i
1043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1044_mm256_mask_maddubs_epi16 (__m256i __W, __mmask16 __U, __m256i __X,
1045 __m256i __Y)
1046{
1047 return (__m256i) __builtin_ia32_pmaddubsw256_mask ((__v32qi) __X,
1048 (__v32qi) __Y,
1049 (__v16hi) __W,
1050 (__mmask16) __U);
1051}
1052
1053extern __inline __m256i
1054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1055_mm256_maskz_maddubs_epi16 (__mmask16 __U, __m256i __X, __m256i __Y)
1056{
1057 return (__m256i) __builtin_ia32_pmaddubsw256_mask ((__v32qi) __X,
1058 (__v32qi) __Y,
1059 (__v16hi)
fd79b414 1060 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
1061 (__mmask16) __U);
1062}
1063
1064extern __inline __m128i
1065__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1066_mm_mask_maddubs_epi16 (__m128i __W, __mmask8 __U, __m128i __X,
1067 __m128i __Y)
1068{
1069 return (__m128i) __builtin_ia32_pmaddubsw128_mask ((__v16qi) __X,
1070 (__v16qi) __Y,
1071 (__v8hi) __W,
1072 (__mmask8) __U);
1073}
1074
1075extern __inline __m128i
1076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1077_mm_maskz_maddubs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y)
1078{
1079 return (__m128i) __builtin_ia32_pmaddubsw128_mask ((__v16qi) __X,
1080 (__v16qi) __Y,
1081 (__v8hi)
fd79b414 1082 _mm_avx512_setzero_si128 (),
936c0fe4
AI
1083 (__mmask8) __U);
1084}
1085
1086extern __inline __m256i
1087__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1088_mm256_mask_madd_epi16 (__m256i __W, __mmask8 __U, __m256i __A,
1089 __m256i __B)
1090{
1091 return (__m256i) __builtin_ia32_pmaddwd256_mask ((__v16hi) __A,
1092 (__v16hi) __B,
1093 (__v8si) __W,
1094 (__mmask8) __U);
1095}
1096
1097extern __inline __m256i
1098__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1099_mm256_maskz_madd_epi16 (__mmask8 __U, __m256i __A, __m256i __B)
1100{
1101 return (__m256i) __builtin_ia32_pmaddwd256_mask ((__v16hi) __A,
1102 (__v16hi) __B,
1103 (__v8si)
fd79b414 1104 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
1105 (__mmask8) __U);
1106}
1107
1108extern __inline __m128i
1109__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1110_mm_mask_madd_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
1111 __m128i __B)
1112{
1113 return (__m128i) __builtin_ia32_pmaddwd128_mask ((__v8hi) __A,
1114 (__v8hi) __B,
1115 (__v4si) __W,
1116 (__mmask8) __U);
1117}
1118
1119extern __inline __m128i
1120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1121_mm_maskz_madd_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
1122{
1123 return (__m128i) __builtin_ia32_pmaddwd128_mask ((__v8hi) __A,
1124 (__v8hi) __B,
1125 (__v4si)
fd79b414 1126 _mm_avx512_setzero_si128 (),
936c0fe4
AI
1127 (__mmask8) __U);
1128}
1129
1130extern __inline __mmask16
1131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1132_mm_movepi8_mask (__m128i __A)
1133{
1134 return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
1135}
1136
1137extern __inline __mmask32
1138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1139_mm256_movepi8_mask (__m256i __A)
1140{
1141 return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
1142}
1143
1144extern __inline __mmask8
1145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1146_mm_movepi16_mask (__m128i __A)
1147{
1148 return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A);
1149}
1150
1151extern __inline __mmask16
1152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1153_mm256_movepi16_mask (__m256i __A)
1154{
1155 return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A);
1156}
1157
1158extern __inline __m128i
1159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1160_mm_movm_epi8 (__mmask16 __A)
1161{
1162 return (__m128i) __builtin_ia32_cvtmask2b128 (__A);
1163}
1164
1165extern __inline __m256i
1166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1167_mm256_movm_epi8 (__mmask32 __A)
1168{
1169 return (__m256i) __builtin_ia32_cvtmask2b256 (__A);
1170}
1171
1172extern __inline __m128i
1173__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1174_mm_movm_epi16 (__mmask8 __A)
1175{
1176 return (__m128i) __builtin_ia32_cvtmask2w128 (__A);
1177}
1178
1179extern __inline __m256i
1180__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1181_mm256_movm_epi16 (__mmask16 __A)
1182{
1183 return (__m256i) __builtin_ia32_cvtmask2w256 (__A);
1184}
1185
1186extern __inline __mmask16
1187__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1188_mm_test_epi8_mask (__m128i __A, __m128i __B)
1189{
1190 return (__mmask16) __builtin_ia32_ptestmb128 ((__v16qi) __A,
1191 (__v16qi) __B,
1192 (__mmask16) -1);
1193}
1194
1195extern __inline __mmask16
1196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1197_mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
1198{
1199 return (__mmask16) __builtin_ia32_ptestmb128 ((__v16qi) __A,
1200 (__v16qi) __B, __U);
1201}
1202
1203extern __inline __mmask32
1204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1205_mm256_test_epi8_mask (__m256i __A, __m256i __B)
1206{
1207 return (__mmask32) __builtin_ia32_ptestmb256 ((__v32qi) __A,
1208 (__v32qi) __B,
1209 (__mmask32) -1);
1210}
1211
1212extern __inline __mmask32
1213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1214_mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
1215{
1216 return (__mmask32) __builtin_ia32_ptestmb256 ((__v32qi) __A,
1217 (__v32qi) __B, __U);
1218}
1219
1220extern __inline __mmask8
1221__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1222_mm_test_epi16_mask (__m128i __A, __m128i __B)
1223{
1224 return (__mmask8) __builtin_ia32_ptestmw128 ((__v8hi) __A,
1225 (__v8hi) __B,
1226 (__mmask8) -1);
1227}
1228
1229extern __inline __mmask8
1230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1231_mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
1232{
1233 return (__mmask8) __builtin_ia32_ptestmw128 ((__v8hi) __A,
1234 (__v8hi) __B, __U);
1235}
1236
1237extern __inline __mmask16
1238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1239_mm256_test_epi16_mask (__m256i __A, __m256i __B)
1240{
1241 return (__mmask16) __builtin_ia32_ptestmw256 ((__v16hi) __A,
1242 (__v16hi) __B,
1243 (__mmask16) -1);
1244}
1245
1246extern __inline __mmask16
1247__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1248_mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
1249{
1250 return (__mmask16) __builtin_ia32_ptestmw256 ((__v16hi) __A,
1251 (__v16hi) __B, __U);
1252}
1253
1254extern __inline __m256i
1255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1256_mm256_maskz_min_epu16 (__mmask16 __M, __m256i __A, __m256i __B)
1257{
1258 return (__m256i) __builtin_ia32_pminuw256_mask ((__v16hi) __A,
1259 (__v16hi) __B,
1260 (__v16hi)
fd79b414 1261 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
1262 (__mmask16) __M);
1263}
1264
1265extern __inline __m256i
1266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1267_mm256_mask_min_epu16 (__m256i __W, __mmask16 __M, __m256i __A,
1268 __m256i __B)
1269{
1270 return (__m256i) __builtin_ia32_pminuw256_mask ((__v16hi) __A,
1271 (__v16hi) __B,
1272 (__v16hi) __W,
1273 (__mmask16) __M);
1274}
1275
1276extern __inline __m128i
1277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1278_mm_maskz_min_epu16 (__mmask8 __M, __m128i __A, __m128i __B)
1279{
1280 return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A,
1281 (__v8hi) __B,
1282 (__v8hi)
fd79b414 1283 _mm_avx512_setzero_si128 (),
936c0fe4
AI
1284 (__mmask8) __M);
1285}
1286
1287extern __inline __m128i
1288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1289_mm_mask_min_epu16 (__m128i __W, __mmask8 __M, __m128i __A,
1290 __m128i __B)
1291{
1292 return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A,
1293 (__v8hi) __B,
1294 (__v8hi) __W,
1295 (__mmask8) __M);
1296}
1297
1298extern __inline __m256i
1299__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1300_mm256_maskz_min_epi16 (__mmask16 __M, __m256i __A, __m256i __B)
1301{
1302 return (__m256i) __builtin_ia32_pminsw256_mask ((__v16hi) __A,
1303 (__v16hi) __B,
1304 (__v16hi)
fd79b414 1305 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
1306 (__mmask16) __M);
1307}
1308
1309extern __inline __m256i
1310__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1311_mm256_mask_min_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
1312 __m256i __B)
1313{
1314 return (__m256i) __builtin_ia32_pminsw256_mask ((__v16hi) __A,
1315 (__v16hi) __B,
1316 (__v16hi) __W,
1317 (__mmask16) __M);
1318}
1319
1320extern __inline __m256i
1321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1322_mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
1323{
1324 return (__m256i) __builtin_ia32_pmaxub256_mask ((__v32qi) __A,
1325 (__v32qi) __B,
1326 (__v32qi)
fd79b414 1327 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
1328 (__mmask32) __M);
1329}
1330
1331extern __inline __m256i
1332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1333_mm256_mask_max_epu8 (__m256i __W, __mmask32 __M, __m256i __A,
1334 __m256i __B)
1335{
1336 return (__m256i) __builtin_ia32_pmaxub256_mask ((__v32qi) __A,
1337 (__v32qi) __B,
1338 (__v32qi) __W,
1339 (__mmask32) __M);
1340}
1341
1342extern __inline __m128i
1343__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1344_mm_maskz_max_epu8 (__mmask16 __M, __m128i __A, __m128i __B)
1345{
1346 return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A,
1347 (__v16qi) __B,
1348 (__v16qi)
fd79b414 1349 _mm_avx512_setzero_si128 (),
936c0fe4
AI
1350 (__mmask16) __M);
1351}
1352
1353extern __inline __m128i
1354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1355_mm_mask_max_epu8 (__m128i __W, __mmask16 __M, __m128i __A,
1356 __m128i __B)
1357{
1358 return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A,
1359 (__v16qi) __B,
1360 (__v16qi) __W,
1361 (__mmask16) __M);
1362}
1363
1364extern __inline __m256i
1365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1366_mm256_maskz_max_epi8 (__mmask32 __M, __m256i __A, __m256i __B)
1367{
1368 return (__m256i) __builtin_ia32_pmaxsb256_mask ((__v32qi) __A,
1369 (__v32qi) __B,
1370 (__v32qi)
fd79b414 1371 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
1372 (__mmask32) __M);
1373}
1374
1375extern __inline __m256i
1376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1377_mm256_mask_max_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
1378 __m256i __B)
1379{
1380 return (__m256i) __builtin_ia32_pmaxsb256_mask ((__v32qi) __A,
1381 (__v32qi) __B,
1382 (__v32qi) __W,
1383 (__mmask32) __M);
1384}
1385
1386extern __inline __m128i
1387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1388_mm_maskz_max_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
1389{
1390 return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A,
1391 (__v16qi) __B,
1392 (__v16qi)
fd79b414 1393 _mm_avx512_setzero_si128 (),
936c0fe4
AI
1394 (__mmask16) __M);
1395}
1396
1397extern __inline __m128i
1398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1399_mm_mask_max_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
1400 __m128i __B)
1401{
1402 return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A,
1403 (__v16qi) __B,
1404 (__v16qi) __W,
1405 (__mmask16) __M);
1406}
1407
1408extern __inline __m256i
1409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1410_mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
1411{
1412 return (__m256i) __builtin_ia32_pminub256_mask ((__v32qi) __A,
1413 (__v32qi) __B,
1414 (__v32qi)
fd79b414 1415 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
1416 (__mmask32) __M);
1417}
1418
1419extern __inline __m256i
1420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1421_mm256_mask_min_epu8 (__m256i __W, __mmask32 __M, __m256i __A,
1422 __m256i __B)
1423{
1424 return (__m256i) __builtin_ia32_pminub256_mask ((__v32qi) __A,
1425 (__v32qi) __B,
1426 (__v32qi) __W,
1427 (__mmask32) __M);
1428}
1429
1430extern __inline __m128i
1431__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1432_mm_maskz_min_epu8 (__mmask16 __M, __m128i __A, __m128i __B)
1433{
1434 return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A,
1435 (__v16qi) __B,
1436 (__v16qi)
fd79b414 1437 _mm_avx512_setzero_si128 (),
936c0fe4
AI
1438 (__mmask16) __M);
1439}
1440
1441extern __inline __m128i
1442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1443_mm_mask_min_epu8 (__m128i __W, __mmask16 __M, __m128i __A,
1444 __m128i __B)
1445{
1446 return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A,
1447 (__v16qi) __B,
1448 (__v16qi) __W,
1449 (__mmask16) __M);
1450}
1451
1452extern __inline __m256i
1453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1454_mm256_maskz_min_epi8 (__mmask32 __M, __m256i __A, __m256i __B)
1455{
1456 return (__m256i) __builtin_ia32_pminsb256_mask ((__v32qi) __A,
1457 (__v32qi) __B,
1458 (__v32qi)
fd79b414 1459 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
1460 (__mmask32) __M);
1461}
1462
1463extern __inline __m256i
1464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1465_mm256_mask_min_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
1466 __m256i __B)
1467{
1468 return (__m256i) __builtin_ia32_pminsb256_mask ((__v32qi) __A,
1469 (__v32qi) __B,
1470 (__v32qi) __W,
1471 (__mmask32) __M);
1472}
1473
1474extern __inline __m128i
1475__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1476_mm_maskz_min_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
1477{
1478 return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A,
1479 (__v16qi) __B,
1480 (__v16qi)
fd79b414 1481 _mm_avx512_setzero_si128 (),
936c0fe4
AI
1482 (__mmask16) __M);
1483}
1484
1485extern __inline __m128i
1486__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1487_mm_mask_min_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
1488 __m128i __B)
1489{
1490 return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A,
1491 (__v16qi) __B,
1492 (__v16qi) __W,
1493 (__mmask16) __M);
1494}
1495
1496extern __inline __m256i
1497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1498_mm256_maskz_max_epi16 (__mmask16 __M, __m256i __A, __m256i __B)
1499{
1500 return (__m256i) __builtin_ia32_pmaxsw256_mask ((__v16hi) __A,
1501 (__v16hi) __B,
1502 (__v16hi)
fd79b414 1503 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
1504 (__mmask16) __M);
1505}
1506
1507extern __inline __m256i
1508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1509_mm256_mask_max_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
1510 __m256i __B)
1511{
1512 return (__m256i) __builtin_ia32_pmaxsw256_mask ((__v16hi) __A,
1513 (__v16hi) __B,
1514 (__v16hi) __W,
1515 (__mmask16) __M);
1516}
1517
1518extern __inline __m128i
1519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1520_mm_maskz_max_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
1521{
1522 return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A,
1523 (__v8hi) __B,
1524 (__v8hi)
fd79b414 1525 _mm_avx512_setzero_si128 (),
936c0fe4
AI
1526 (__mmask8) __M);
1527}
1528
1529extern __inline __m128i
1530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1531_mm_mask_max_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
1532 __m128i __B)
1533{
1534 return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A,
1535 (__v8hi) __B,
1536 (__v8hi) __W,
1537 (__mmask8) __M);
1538}
1539
1540extern __inline __m256i
1541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1542_mm256_maskz_max_epu16 (__mmask16 __M, __m256i __A, __m256i __B)
1543{
1544 return (__m256i) __builtin_ia32_pmaxuw256_mask ((__v16hi) __A,
1545 (__v16hi) __B,
1546 (__v16hi)
fd79b414 1547 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
1548 (__mmask16) __M);
1549}
1550
1551extern __inline __m256i
1552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1553_mm256_mask_max_epu16 (__m256i __W, __mmask16 __M, __m256i __A,
1554 __m256i __B)
1555{
1556 return (__m256i) __builtin_ia32_pmaxuw256_mask ((__v16hi) __A,
1557 (__v16hi) __B,
1558 (__v16hi) __W,
1559 (__mmask16) __M);
1560}
1561
1562extern __inline __m128i
1563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1564_mm_maskz_max_epu16 (__mmask8 __M, __m128i __A, __m128i __B)
1565{
1566 return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A,
1567 (__v8hi) __B,
1568 (__v8hi)
fd79b414 1569 _mm_avx512_setzero_si128 (),
936c0fe4
AI
1570 (__mmask8) __M);
1571}
1572
1573extern __inline __m128i
1574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1575_mm_mask_max_epu16 (__m128i __W, __mmask8 __M, __m128i __A,
1576 __m128i __B)
1577{
1578 return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A,
1579 (__v8hi) __B,
1580 (__v8hi) __W,
1581 (__mmask8) __M);
1582}
1583
1584extern __inline __m128i
1585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1586_mm_maskz_min_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
1587{
1588 return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A,
1589 (__v8hi) __B,
1590 (__v8hi)
fd79b414 1591 _mm_avx512_setzero_si128 (),
936c0fe4
AI
1592 (__mmask8) __M);
1593}
1594
1595extern __inline __m128i
1596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1597_mm_mask_min_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
1598 __m128i __B)
1599{
1600 return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A,
1601 (__v8hi) __B,
1602 (__v8hi) __W,
1603 (__mmask8) __M);
1604}
1605
1606#ifdef __OPTIMIZE__
1607extern __inline __m256i
1608__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1609_mm256_mask_alignr_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
1610 __m256i __B, const int __N)
1611{
1612 return (__m256i) __builtin_ia32_palignr256_mask ((__v4di) __A,
1613 (__v4di) __B,
1614 __N * 8,
1615 (__v4di) __W,
1616 (__mmask32) __U);
1617}
1618
1619extern __inline __m256i
1620__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1621_mm256_maskz_alignr_epi8 (__mmask32 __U, __m256i __A, __m256i __B,
1622 const int __N)
1623{
1624 return (__m256i) __builtin_ia32_palignr256_mask ((__v4di) __A,
1625 (__v4di) __B,
1626 __N * 8,
1627 (__v4di)
fd79b414 1628 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
1629 (__mmask32) __U);
1630}
1631
1632extern __inline __m128i
1633__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1634_mm_mask_alignr_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
1635 __m128i __B, const int __N)
1636{
1637 return (__m128i) __builtin_ia32_palignr128_mask ((__v2di) __A,
1638 (__v2di) __B,
1639 __N * 8,
1640 (__v2di) __W,
1641 (__mmask16) __U);
1642}
1643
1644extern __inline __m128i
1645__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1646_mm_maskz_alignr_epi8 (__mmask16 __U, __m128i __A, __m128i __B,
1647 const int __N)
1648{
1649 return (__m128i) __builtin_ia32_palignr128_mask ((__v2di) __A,
1650 (__v2di) __B,
1651 __N * 8,
1652 (__v2di)
fd79b414 1653 _mm_avx512_setzero_si128 (),
936c0fe4
AI
1654 (__mmask16) __U);
1655}
1656
1657extern __inline __m256i
1658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1659_mm256_dbsad_epu8 (__m256i __A, __m256i __B, const int __imm)
1660{
1661 return (__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi) __A,
1662 (__v32qi) __B,
1663 __imm,
1664 (__v16hi)
fd79b414 1665 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
1666 (__mmask16) -1);
1667}
1668
1669extern __inline __m256i
1670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1671_mm256_mask_dbsad_epu8 (__m256i __W, __mmask16 __U, __m256i __A,
1672 __m256i __B, const int __imm)
1673{
1674 return (__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi) __A,
1675 (__v32qi) __B,
1676 __imm,
1677 (__v16hi) __W,
1678 (__mmask16) __U);
1679}
1680
1681extern __inline __m256i
1682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1683_mm256_maskz_dbsad_epu8 (__mmask16 __U, __m256i __A, __m256i __B,
1684 const int __imm)
1685{
1686 return (__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi) __A,
1687 (__v32qi) __B,
1688 __imm,
1689 (__v16hi)
fd79b414 1690 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
1691 (__mmask16) __U);
1692}
1693
1694extern __inline __m128i
1695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1696_mm_dbsad_epu8 (__m128i __A, __m128i __B, const int __imm)
1697{
1698 return (__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi) __A,
1699 (__v16qi) __B,
1700 __imm,
1701 (__v8hi)
fd79b414 1702 _mm_avx512_setzero_si128 (),
936c0fe4
AI
1703 (__mmask8) -1);
1704}
1705
1706extern __inline __m128i
1707__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1708_mm_mask_dbsad_epu8 (__m128i __W, __mmask8 __U, __m128i __A,
1709 __m128i __B, const int __imm)
1710{
1711 return (__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi) __A,
1712 (__v16qi) __B,
1713 __imm,
1714 (__v8hi) __W,
1715 (__mmask8) __U);
1716}
1717
1718extern __inline __m128i
1719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1720_mm_maskz_dbsad_epu8 (__mmask8 __U, __m128i __A, __m128i __B,
1721 const int __imm)
1722{
1723 return (__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi) __A,
1724 (__v16qi) __B,
1725 __imm,
1726 (__v8hi)
fd79b414 1727 _mm_avx512_setzero_si128 (),
936c0fe4
AI
1728 (__mmask8) __U);
1729}
1730
936c0fe4
AI
1731extern __inline __mmask8
1732__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1733_mm_mask_cmp_epi16_mask (__mmask8 __U, __m128i __X, __m128i __Y,
1734 const int __P)
1735{
1736 return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
1737 (__v8hi) __Y, __P,
1738 (__mmask8) __U);
1739}
1740
1741extern __inline __mmask8
1742__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1743_mm_cmp_epi16_mask (__m128i __X, __m128i __Y, const int __P)
1744{
1745 return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
1746 (__v8hi) __Y, __P,
1747 (__mmask8) -1);
1748}
1749
1750extern __inline __mmask16
1751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1752_mm256_mask_cmp_epi16_mask (__mmask16 __U, __m256i __X, __m256i __Y,
1753 const int __P)
1754{
1755 return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
1756 (__v16hi) __Y, __P,
1757 (__mmask16) __U);
1758}
1759
1760extern __inline __mmask16
1761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1762_mm256_cmp_epi16_mask (__m256i __X, __m256i __Y, const int __P)
1763{
1764 return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
1765 (__v16hi) __Y, __P,
1766 (__mmask16) -1);
1767}
1768
1769extern __inline __mmask16
1770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 1771_mm_mask_cmp_epi8_mask (__mmask16 __U, __m128i __X, __m128i __Y,
936c0fe4
AI
1772 const int __P)
1773{
1774 return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
1775 (__v16qi) __Y, __P,
1776 (__mmask16) __U);
1777}
1778
1779extern __inline __mmask16
1780__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1781_mm_cmp_epi8_mask (__m128i __X, __m128i __Y, const int __P)
1782{
1783 return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
1784 (__v16qi) __Y, __P,
1785 (__mmask16) -1);
1786}
1787
1788extern __inline __mmask32
1789__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 1790_mm256_mask_cmp_epi8_mask (__mmask32 __U, __m256i __X, __m256i __Y,
936c0fe4
AI
1791 const int __P)
1792{
1793 return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
1794 (__v32qi) __Y, __P,
1795 (__mmask32) __U);
1796}
1797
18379eea 1798extern __inline __mmask32
936c0fe4
AI
1799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1800_mm256_cmp_epi8_mask (__m256i __X, __m256i __Y, const int __P)
1801{
1802 return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
1803 (__v32qi) __Y, __P,
1804 (__mmask32) -1);
1805}
1806
1807extern __inline __mmask8
1808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1809_mm_mask_cmp_epu16_mask (__mmask8 __U, __m128i __X, __m128i __Y,
1810 const int __P)
1811{
1812 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
1813 (__v8hi) __Y, __P,
1814 (__mmask8) __U);
1815}
1816
1817extern __inline __mmask8
1818__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1819_mm_cmp_epu16_mask (__m128i __X, __m128i __Y, const int __P)
1820{
1821 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
1822 (__v8hi) __Y, __P,
1823 (__mmask8) -1);
1824}
1825
1826extern __inline __mmask16
1827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1828_mm256_mask_cmp_epu16_mask (__mmask16 __U, __m256i __X, __m256i __Y,
1829 const int __P)
1830{
1831 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
1832 (__v16hi) __Y, __P,
1833 (__mmask16) __U);
1834}
1835
1836extern __inline __mmask16
1837__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1838_mm256_cmp_epu16_mask (__m256i __X, __m256i __Y, const int __P)
1839{
1840 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
1841 (__v16hi) __Y, __P,
1842 (__mmask16) -1);
1843}
1844
1845extern __inline __mmask16
1846__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 1847_mm_mask_cmp_epu8_mask (__mmask16 __U, __m128i __X, __m128i __Y,
936c0fe4
AI
1848 const int __P)
1849{
1850 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
1851 (__v16qi) __Y, __P,
1852 (__mmask16) __U);
1853}
1854
1855extern __inline __mmask16
1856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1857_mm_cmp_epu8_mask (__m128i __X, __m128i __Y, const int __P)
1858{
1859 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
1860 (__v16qi) __Y, __P,
1861 (__mmask16) -1);
1862}
1863
1864extern __inline __mmask32
1865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 1866_mm256_mask_cmp_epu8_mask (__mmask32 __U, __m256i __X, __m256i __Y,
936c0fe4
AI
1867 const int __P)
1868{
1869 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
1870 (__v32qi) __Y, __P,
1871 (__mmask32) __U);
1872}
1873
18379eea 1874extern __inline __mmask32
936c0fe4
AI
1875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1876_mm256_cmp_epu8_mask (__m256i __X, __m256i __Y, const int __P)
1877{
1878 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
1879 (__v32qi) __Y, __P,
1880 (__mmask32) -1);
1881}
1882
1883extern __inline __m256i
1884__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1885_mm256_mask_srli_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
1886 const int __imm)
1887{
1888 return (__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi) __A, __imm,
1889 (__v16hi) __W,
1890 (__mmask16) __U);
1891}
1892
1893extern __inline __m256i
1894__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1895_mm256_maskz_srli_epi16 (__mmask16 __U, __m256i __A, const int __imm)
1896{
1897 return (__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi) __A, __imm,
1898 (__v16hi)
fd79b414 1899 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
1900 (__mmask16) __U);
1901}
1902
1903extern __inline __m128i
1904__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1905_mm_mask_srli_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
1906 const int __imm)
1907{
1908 return (__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi) __A, __imm,
1909 (__v8hi) __W,
1910 (__mmask8) __U);
1911}
1912
1913extern __inline __m128i
1914__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1915_mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, const int __imm)
1916{
1917 return (__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi) __A, __imm,
1918 (__v8hi)
fd79b414 1919 _mm_avx512_setzero_si128 (),
936c0fe4
AI
1920 (__mmask8) __U);
1921}
1922
1923extern __inline __m256i
1924__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1925_mm256_mask_shufflehi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
1926 const int __imm)
1927{
1928 return (__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi) __A,
1929 __imm,
1930 (__v16hi) __W,
1931 (__mmask16) __U);
1932}
1933
1934extern __inline __m256i
1935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1936_mm256_maskz_shufflehi_epi16 (__mmask16 __U, __m256i __A,
1937 const int __imm)
1938{
1939 return (__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi) __A,
1940 __imm,
1941 (__v16hi)
fd79b414 1942 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
1943 (__mmask16) __U);
1944}
1945
1946extern __inline __m128i
1947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1948_mm_mask_shufflehi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
1949 const int __imm)
1950{
1951 return (__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi) __A, __imm,
1952 (__v8hi) __W,
1953 (__mmask8) __U);
1954}
1955
1956extern __inline __m128i
1957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1958_mm_maskz_shufflehi_epi16 (__mmask8 __U, __m128i __A, const int __imm)
1959{
1960 return (__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi) __A, __imm,
1961 (__v8hi)
fd79b414 1962 _mm_avx512_setzero_si128 (),
936c0fe4
AI
1963 (__mmask8) __U);
1964}
1965
1966extern __inline __m256i
1967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1968_mm256_mask_shufflelo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
1969 const int __imm)
1970{
1971 return (__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi) __A,
1972 __imm,
1973 (__v16hi) __W,
1974 (__mmask16) __U);
1975}
1976
1977extern __inline __m256i
1978__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1979_mm256_maskz_shufflelo_epi16 (__mmask16 __U, __m256i __A,
1980 const int __imm)
1981{
1982 return (__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi) __A,
1983 __imm,
1984 (__v16hi)
fd79b414 1985 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
1986 (__mmask16) __U);
1987}
1988
1989extern __inline __m128i
1990__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1991_mm_mask_shufflelo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
1992 const int __imm)
1993{
1994 return (__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi) __A, __imm,
1995 (__v8hi) __W,
1996 (__mmask8) __U);
1997}
1998
1999extern __inline __m128i
2000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2001_mm_maskz_shufflelo_epi16 (__mmask8 __U, __m128i __A, const int __imm)
2002{
2003 return (__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi) __A, __imm,
2004 (__v8hi)
fd79b414 2005 _mm_avx512_setzero_si128 (),
936c0fe4
AI
2006 (__mmask8) __U);
2007}
2008
2009extern __inline __m256i
2010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2011_mm256_mask_srai_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
4a84a2db 2012 const unsigned int __imm)
936c0fe4
AI
2013{
2014 return (__m256i) __builtin_ia32_psrawi256_mask ((__v16hi) __A, __imm,
2015 (__v16hi) __W,
2016 (__mmask16) __U);
2017}
2018
2019extern __inline __m256i
2020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4a84a2db 2021_mm256_maskz_srai_epi16 (__mmask16 __U, __m256i __A, const unsigned int __imm)
936c0fe4
AI
2022{
2023 return (__m256i) __builtin_ia32_psrawi256_mask ((__v16hi) __A, __imm,
2024 (__v16hi)
fd79b414 2025 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
2026 (__mmask16) __U);
2027}
2028
2029extern __inline __m128i
2030__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2031_mm_mask_srai_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4a84a2db 2032 const unsigned int __imm)
936c0fe4
AI
2033{
2034 return (__m128i) __builtin_ia32_psrawi128_mask ((__v8hi) __A, __imm,
2035 (__v8hi) __W,
2036 (__mmask8) __U);
2037}
2038
2039extern __inline __m128i
2040__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4a84a2db 2041_mm_maskz_srai_epi16 (__mmask8 __U, __m128i __A, const unsigned int __imm)
936c0fe4
AI
2042{
2043 return (__m128i) __builtin_ia32_psrawi128_mask ((__v8hi) __A, __imm,
2044 (__v8hi)
fd79b414 2045 _mm_avx512_setzero_si128 (),
936c0fe4
AI
2046 (__mmask8) __U);
2047}
2048
2049extern __inline __m256i
2050__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2051_mm256_mask_slli_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
4a84a2db 2052 unsigned int __B)
936c0fe4
AI
2053{
2054 return (__m256i) __builtin_ia32_psllwi256_mask ((__v16hi) __A, __B,
2055 (__v16hi) __W,
2056 (__mmask16) __U);
2057}
2058
2059extern __inline __m256i
2060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4a84a2db 2061_mm256_maskz_slli_epi16 (__mmask16 __U, __m256i __A, unsigned int __B)
936c0fe4
AI
2062{
2063 return (__m256i) __builtin_ia32_psllwi256_mask ((__v16hi) __A, __B,
2064 (__v16hi)
fd79b414 2065 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
2066 (__mmask16) __U);
2067}
2068
2069extern __inline __m128i
2070__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4a84a2db 2071_mm_mask_slli_epi16 (__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
936c0fe4
AI
2072{
2073 return (__m128i) __builtin_ia32_psllwi128_mask ((__v8hi) __A, __B,
2074 (__v8hi) __W,
2075 (__mmask8) __U);
2076}
2077
2078extern __inline __m128i
2079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4a84a2db 2080_mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
936c0fe4
AI
2081{
2082 return (__m128i) __builtin_ia32_psllwi128_mask ((__v8hi) __A, __B,
2083 (__v8hi)
fd79b414 2084 _mm_avx512_setzero_si128 (),
936c0fe4
AI
2085 (__mmask8) __U);
2086}
2087
2088#else
2089#define _mm256_mask_alignr_epi8(W, U, X, Y, N) \
2090 ((__m256i) __builtin_ia32_palignr256_mask ((__v4di)(__m256i)(X), \
0c8217b1 2091 (__v4di)(__m256i)(Y), (int)((N) * 8), \
936c0fe4
AI
2092 (__v4di)(__m256i)(X), (__mmask32)(U)))
2093
2094#define _mm256_mask_srli_epi16(W, U, A, B) \
2095 ((__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi)(__m256i)(A), \
2096 (int)(B), (__v16hi)(__m256i)(W), (__mmask16)(U)))
2097
2098#define _mm256_maskz_srli_epi16(U, A, B) \
2099 ((__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi)(__m256i)(A), \
fd79b414 2100 (int)(B), (__v16hi)_mm256_avx512_setzero_si256 (), (__mmask16)(U)))
936c0fe4
AI
2101
2102#define _mm_mask_srli_epi16(W, U, A, B) \
2103 ((__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi)(__m128i)(A), \
2104 (int)(B), (__v8hi)(__m128i)(W), (__mmask8)(U)))
2105
2106#define _mm_maskz_srli_epi16(U, A, B) \
2107 ((__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi)(__m128i)(A), \
fd79b414 2108 (int)(B), (__v8hi)_mm_avx512_setzero_si128 (), (__mmask8)(U)))
936c0fe4
AI
2109
2110#define _mm256_mask_srai_epi16(W, U, A, B) \
2111 ((__m256i) __builtin_ia32_psrawi256_mask ((__v16hi)(__m256i)(A), \
4a84a2db 2112 (unsigned int)(B), (__v16hi)(__m256i)(W), (__mmask16)(U)))
936c0fe4
AI
2113
2114#define _mm256_maskz_srai_epi16(U, A, B) \
2115 ((__m256i) __builtin_ia32_psrawi256_mask ((__v16hi)(__m256i)(A), \
fd79b414 2116 (unsigned int)(B), (__v16hi)_mm256_avx512_setzero_si256 (), (__mmask16)(U)))
936c0fe4
AI
2117
2118#define _mm_mask_srai_epi16(W, U, A, B) \
2119 ((__m128i) __builtin_ia32_psrawi128_mask ((__v8hi)(__m128i)(A), \
4a84a2db 2120 (unsigned int)(B), (__v8hi)(__m128i)(W), (__mmask8)(U)))
936c0fe4
AI
2121
2122#define _mm_maskz_srai_epi16(U, A, B) \
2123 ((__m128i) __builtin_ia32_psrawi128_mask ((__v8hi)(__m128i)(A), \
fd79b414 2124 (unsigned int)(B), (__v8hi)_mm_avx512_setzero_si128(), (__mmask8)(U)))
936c0fe4
AI
2125
2126#define _mm256_mask_shufflehi_epi16(W, U, A, B) \
2127 ((__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi)(__m256i)(A), (int)(B), \
2128 (__v16hi)(__m256i)(W), \
2129 (__mmask16)(U)))
2130
2131#define _mm256_maskz_shufflehi_epi16(U, A, B) \
2132 ((__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi)(__m256i)(A), (int)(B), \
fd79b414 2133 (__v16hi)(__m256i)_mm256_avx512_setzero_si256 (), \
936c0fe4
AI
2134 (__mmask16)(U)))
2135
2136#define _mm_mask_shufflehi_epi16(W, U, A, B) \
2137 ((__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi)(__m128i)(A), (int)(B), \
2138 (__v8hi)(__m128i)(W), \
2139 (__mmask8)(U)))
2140
2141#define _mm_maskz_shufflehi_epi16(U, A, B) \
2142 ((__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi)(__m128i)(A), (int)(B), \
fd79b414 2143 (__v8hi)(__m128i)_mm_avx512_setzero_si128 (), \
936c0fe4
AI
2144 (__mmask8)(U)))
2145
2146#define _mm256_mask_shufflelo_epi16(W, U, A, B) \
2147 ((__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi)(__m256i)(A), (int)(B), \
2148 (__v16hi)(__m256i)(W), \
2149 (__mmask16)(U)))
2150
2151#define _mm256_maskz_shufflelo_epi16(U, A, B) \
2152 ((__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi)(__m256i)(A), (int)(B), \
fd79b414 2153 (__v16hi)(__m256i)_mm256_avx512_setzero_si256 (), \
936c0fe4
AI
2154 (__mmask16)(U)))
2155
2156#define _mm_mask_shufflelo_epi16(W, U, A, B) \
2157 ((__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi)(__m128i)(A), (int)(B), \
2158 (__v8hi)(__m128i)(W), \
2159 (__mmask8)(U)))
2160
2161#define _mm_maskz_shufflelo_epi16(U, A, B) \
2162 ((__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi)(__m128i)(A), (int)(B), \
fd79b414 2163 (__v8hi)(__m128i)_mm_avx512_setzero_si128 (), \
936c0fe4
AI
2164 (__mmask8)(U)))
2165
2166#define _mm256_maskz_alignr_epi8(U, X, Y, N) \
2167 ((__m256i) __builtin_ia32_palignr256_mask ((__v4di)(__m256i)(X), \
0c8217b1 2168 (__v4di)(__m256i)(Y), (int)((N) * 8), \
fd79b414 2169 (__v4di)(__m256i)_mm256_avx512_setzero_si256 (), \
936c0fe4
AI
2170 (__mmask32)(U)))
2171
2172#define _mm_mask_alignr_epi8(W, U, X, Y, N) \
2173 ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X), \
0c8217b1 2174 (__v2di)(__m128i)(Y), (int)((N) * 8), \
936c0fe4
AI
2175 (__v2di)(__m128i)(X), (__mmask16)(U)))
2176
2177#define _mm_maskz_alignr_epi8(U, X, Y, N) \
2178 ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X), \
0c8217b1 2179 (__v2di)(__m128i)(Y), (int)((N) * 8), \
fd79b414 2180 (__v2di)(__m128i)_mm_avx512_setzero_si128 (), \
936c0fe4
AI
2181 (__mmask16)(U)))
2182
4a84a2db
HL
2183#define _mm_mask_slli_epi16(W, U, X, C) \
2184 ((__m128i)__builtin_ia32_psllwi128_mask ((__v8hi)(__m128i)(X), \
2185 (unsigned int)(C), \
2186 (__v8hi)(__m128i)(W), \
936c0fe4
AI
2187 (__mmask8)(U)))
2188
4a84a2db
HL
2189#define _mm_maskz_slli_epi16(U, X, C) \
2190 ((__m128i)__builtin_ia32_psllwi128_mask ((__v8hi)(__m128i)(X), \
2191 (unsigned int)(C), \
fd79b414 2192 (__v8hi)(__m128i)_mm_avx512_setzero_si128 (), \
936c0fe4
AI
2193 (__mmask8)(U)))
2194
2195#define _mm256_dbsad_epu8(X, Y, C) \
2196 ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X), \
2197 (__v32qi)(__m256i) (Y), (int) (C), \
fd79b414 2198 (__v16hi)(__m256i)_mm256_avx512_setzero_si256(),\
936c0fe4
AI
2199 (__mmask16)-1))
2200
4a84a2db
HL
2201#define _mm256_mask_slli_epi16(W, U, X, C) \
2202 ((__m256i)__builtin_ia32_psllwi256_mask ((__v16hi)(__m256i)(X), \
2203 (unsigned int)(C), \
2204 (__v16hi)(__m256i)(W), \
936c0fe4
AI
2205 (__mmask16)(U)))
2206
4a84a2db
HL
2207#define _mm256_maskz_slli_epi16(U, X, C) \
2208 ((__m256i)__builtin_ia32_psllwi256_mask ((__v16hi)(__m256i)(X), \
2209 (unsigned int)(C), \
fd79b414 2210 (__v16hi)(__m256i)_mm256_avx512_setzero_si256 (), \
936c0fe4
AI
2211 (__mmask16)(U)))
2212
2213#define _mm256_mask_dbsad_epu8(W, U, X, Y, C) \
2214 ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X), \
2215 (__v32qi)(__m256i) (Y), (int) (C), \
2216 (__v16hi)(__m256i)(W), \
2217 (__mmask16)(U)))
2218
2219#define _mm256_maskz_dbsad_epu8(U, X, Y, C) \
2220 ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X), \
2221 (__v32qi)(__m256i) (Y), (int) (C), \
fd79b414 2222 (__v16hi)(__m256i)_mm256_avx512_setzero_si256(),\
936c0fe4
AI
2223 (__mmask16)(U)))
2224
2225#define _mm_dbsad_epu8(X, Y, C) \
2226 ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X), \
2227 (__v16qi)(__m128i) (Y), (int) (C), \
fd79b414 2228 (__v8hi)(__m128i)_mm_avx512_setzero_si128(), \
936c0fe4
AI
2229 (__mmask8)-1))
2230
2231#define _mm_mask_dbsad_epu8(W, U, X, Y, C) \
2232 ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X), \
2233 (__v16qi)(__m128i) (Y), (int) (C), \
2234 (__v8hi)(__m128i)(W), \
2235 (__mmask8)(U)))
2236
2237#define _mm_maskz_dbsad_epu8(U, X, Y, C) \
2238 ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X), \
2239 (__v16qi)(__m128i) (Y), (int) (C), \
fd79b414 2240 (__v8hi)(__m128i)_mm_avx512_setzero_si128(), \
936c0fe4
AI
2241 (__mmask8)(U)))
2242
936c0fe4
AI
2243#define _mm_cmp_epi16_mask(X, Y, P) \
2244 ((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X), \
2245 (__v8hi)(__m128i)(Y), (int)(P),\
2246 (__mmask8)(-1)))
2247
2248#define _mm_cmp_epi8_mask(X, Y, P) \
2249 ((__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi)(__m128i)(X), \
2250 (__v16qi)(__m128i)(Y), (int)(P),\
2251 (__mmask16)(-1)))
2252
2253#define _mm256_cmp_epi16_mask(X, Y, P) \
2254 ((__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi)(__m256i)(X), \
2255 (__v16hi)(__m256i)(Y), (int)(P),\
2256 (__mmask16)(-1)))
2257
2258#define _mm256_cmp_epi8_mask(X, Y, P) \
2259 ((__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi)(__m256i)(X), \
2260 (__v32qi)(__m256i)(Y), (int)(P),\
2261 (__mmask32)(-1)))
2262
2263#define _mm_cmp_epu16_mask(X, Y, P) \
2264 ((__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi)(__m128i)(X), \
2265 (__v8hi)(__m128i)(Y), (int)(P),\
2266 (__mmask8)(-1)))
2267
2268#define _mm_cmp_epu8_mask(X, Y, P) \
2269 ((__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi)(__m128i)(X), \
2270 (__v16qi)(__m128i)(Y), (int)(P),\
2271 (__mmask16)(-1)))
2272
2273#define _mm256_cmp_epu16_mask(X, Y, P) \
2274 ((__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi)(__m256i)(X), \
2275 (__v16hi)(__m256i)(Y), (int)(P),\
2276 (__mmask16)(-1)))
2277
2278#define _mm256_cmp_epu8_mask(X, Y, P) \
2279 ((__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi)(__m256i)(X), \
2280 (__v32qi)(__m256i)(Y), (int)(P),\
2281 (__mmask32)-1))
2282
2283#define _mm_mask_cmp_epi16_mask(M, X, Y, P) \
2284 ((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X), \
2285 (__v8hi)(__m128i)(Y), (int)(P),\
18379eea 2286 (__mmask8)(M)))
936c0fe4
AI
2287
2288#define _mm_mask_cmp_epi8_mask(M, X, Y, P) \
2289 ((__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi)(__m128i)(X), \
2290 (__v16qi)(__m128i)(Y), (int)(P),\
2291 (__mmask16)(M)))
2292
2293#define _mm256_mask_cmp_epi16_mask(M, X, Y, P) \
2294 ((__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi)(__m256i)(X), \
2295 (__v16hi)(__m256i)(Y), (int)(P),\
2296 (__mmask16)(M)))
2297
2298#define _mm256_mask_cmp_epi8_mask(M, X, Y, P) \
2299 ((__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi)(__m256i)(X), \
2300 (__v32qi)(__m256i)(Y), (int)(P),\
2301 (__mmask32)(M)))
2302
2303#define _mm_mask_cmp_epu16_mask(M, X, Y, P) \
2304 ((__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi)(__m128i)(X), \
2305 (__v8hi)(__m128i)(Y), (int)(P),\
2306 (__mmask8)(M)))
2307
2308#define _mm_mask_cmp_epu8_mask(M, X, Y, P) \
2309 ((__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi)(__m128i)(X), \
2310 (__v16qi)(__m128i)(Y), (int)(P),\
2311 (__mmask16)(M)))
2312
2313#define _mm256_mask_cmp_epu16_mask(M, X, Y, P) \
2314 ((__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi)(__m256i)(X), \
2315 (__v16hi)(__m256i)(Y), (int)(P),\
2316 (__mmask16)(M)))
2317
2318#define _mm256_mask_cmp_epu8_mask(M, X, Y, P) \
2319 ((__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi)(__m256i)(X), \
2320 (__v32qi)(__m256i)(Y), (int)(P),\
0c8217b1 2321 (__mmask32)(M)))
936c0fe4
AI
2322#endif
2323
2324extern __inline __mmask32
2325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2326_mm256_cmpneq_epi8_mask (__m256i __X, __m256i __Y)
2327{
2328 return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
2329 (__v32qi) __Y, 4,
c42b0bdf 2330 (__mmask32) -1);
936c0fe4
AI
2331}
2332
2333extern __inline __mmask32
2334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2335_mm256_cmplt_epi8_mask (__m256i __X, __m256i __Y)
2336{
2337 return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
2338 (__v32qi) __Y, 1,
c42b0bdf 2339 (__mmask32) -1);
936c0fe4
AI
2340}
2341
2342extern __inline __mmask32
2343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2344_mm256_cmpge_epi8_mask (__m256i __X, __m256i __Y)
2345{
2346 return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
2347 (__v32qi) __Y, 5,
c42b0bdf 2348 (__mmask32) -1);
936c0fe4
AI
2349}
2350
2351extern __inline __mmask32
2352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2353_mm256_cmple_epi8_mask (__m256i __X, __m256i __Y)
2354{
2355 return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
2356 (__v32qi) __Y, 2,
c42b0bdf 2357 (__mmask32) -1);
936c0fe4
AI
2358}
2359
2360extern __inline __mmask16
2361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2362_mm256_cmpneq_epi16_mask (__m256i __X, __m256i __Y)
2363{
2364 return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
2365 (__v16hi) __Y, 4,
c42b0bdf 2366 (__mmask16) -1);
936c0fe4
AI
2367}
2368
2369extern __inline __mmask16
2370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2371_mm256_cmplt_epi16_mask (__m256i __X, __m256i __Y)
2372{
2373 return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
2374 (__v16hi) __Y, 1,
c42b0bdf 2375 (__mmask16) -1);
936c0fe4
AI
2376}
2377
2378extern __inline __mmask16
2379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2380_mm256_cmpge_epi16_mask (__m256i __X, __m256i __Y)
2381{
2382 return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
2383 (__v16hi) __Y, 5,
c42b0bdf 2384 (__mmask16) -1);
936c0fe4
AI
2385}
2386
2387extern __inline __mmask16
2388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2389_mm256_cmple_epi16_mask (__m256i __X, __m256i __Y)
2390{
2391 return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
2392 (__v16hi) __Y, 2,
c42b0bdf 2393 (__mmask16) -1);
936c0fe4
AI
2394}
2395
2396extern __inline __mmask16
2397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2398_mm_cmpneq_epu8_mask (__m128i __X, __m128i __Y)
2399{
2400 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
2401 (__v16qi) __Y, 4,
c42b0bdf 2402 (__mmask16) -1);
936c0fe4
AI
2403}
2404
2405extern __inline __mmask16
2406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2407_mm_cmplt_epu8_mask (__m128i __X, __m128i __Y)
2408{
2409 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
2410 (__v16qi) __Y, 1,
c42b0bdf 2411 (__mmask16) -1);
936c0fe4
AI
2412}
2413
2414extern __inline __mmask16
2415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2416_mm_cmpge_epu8_mask (__m128i __X, __m128i __Y)
2417{
2418 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
2419 (__v16qi) __Y, 5,
c42b0bdf 2420 (__mmask16) -1);
936c0fe4
AI
2421}
2422
2423extern __inline __mmask16
2424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2425_mm_cmple_epu8_mask (__m128i __X, __m128i __Y)
2426{
2427 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
2428 (__v16qi) __Y, 2,
c42b0bdf 2429 (__mmask16) -1);
936c0fe4
AI
2430}
2431
2432extern __inline __mmask8
2433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2434_mm_cmpneq_epu16_mask (__m128i __X, __m128i __Y)
2435{
2436 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
2437 (__v8hi) __Y, 4,
c42b0bdf 2438 (__mmask8) -1);
936c0fe4
AI
2439}
2440
2441extern __inline __mmask8
2442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2443_mm_cmplt_epu16_mask (__m128i __X, __m128i __Y)
2444{
2445 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
2446 (__v8hi) __Y, 1,
c42b0bdf 2447 (__mmask8) -1);
936c0fe4
AI
2448}
2449
2450extern __inline __mmask8
2451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2452_mm_cmpge_epu16_mask (__m128i __X, __m128i __Y)
2453{
2454 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
2455 (__v8hi) __Y, 5,
c42b0bdf 2456 (__mmask8) -1);
936c0fe4
AI
2457}
2458
2459extern __inline __mmask8
2460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2461_mm_cmple_epu16_mask (__m128i __X, __m128i __Y)
2462{
2463 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
2464 (__v8hi) __Y, 2,
c42b0bdf 2465 (__mmask8) -1);
936c0fe4
AI
2466}
2467
2468extern __inline __mmask16
2469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2470_mm_cmpneq_epi8_mask (__m128i __X, __m128i __Y)
2471{
2472 return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
2473 (__v16qi) __Y, 4,
c42b0bdf 2474 (__mmask16) -1);
936c0fe4
AI
2475}
2476
2477extern __inline __mmask16
2478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2479_mm_cmplt_epi8_mask (__m128i __X, __m128i __Y)
2480{
2481 return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
2482 (__v16qi) __Y, 1,
c42b0bdf 2483 (__mmask16) -1);
936c0fe4
AI
2484}
2485
2486extern __inline __mmask16
2487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2488_mm_cmpge_epi8_mask (__m128i __X, __m128i __Y)
2489{
2490 return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
2491 (__v16qi) __Y, 5,
c42b0bdf 2492 (__mmask16) -1);
936c0fe4
AI
2493}
2494
2495extern __inline __mmask16
2496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2497_mm_cmple_epi8_mask (__m128i __X, __m128i __Y)
2498{
2499 return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
2500 (__v16qi) __Y, 2,
c42b0bdf 2501 (__mmask16) -1);
936c0fe4
AI
2502}
2503
2504extern __inline __mmask8
2505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2506_mm_cmpneq_epi16_mask (__m128i __X, __m128i __Y)
2507{
2508 return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
2509 (__v8hi) __Y, 4,
c42b0bdf 2510 (__mmask8) -1);
936c0fe4
AI
2511}
2512
2513extern __inline __mmask8
2514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2515_mm_cmplt_epi16_mask (__m128i __X, __m128i __Y)
2516{
2517 return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
2518 (__v8hi) __Y, 1,
c42b0bdf 2519 (__mmask8) -1);
936c0fe4
AI
2520}
2521
2522extern __inline __mmask8
2523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2524_mm_cmpge_epi16_mask (__m128i __X, __m128i __Y)
2525{
2526 return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
2527 (__v8hi) __Y, 5,
c42b0bdf 2528 (__mmask8) -1);
936c0fe4
AI
2529}
2530
2531extern __inline __mmask8
2532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2533_mm_cmple_epi16_mask (__m128i __X, __m128i __Y)
2534{
2535 return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
2536 (__v8hi) __Y, 2,
c42b0bdf 2537 (__mmask8) -1);
936c0fe4
AI
2538}
2539
2540extern __inline __m256i
2541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2542_mm256_mask_mulhrs_epi16 (__m256i __W, __mmask16 __U, __m256i __X,
2543 __m256i __Y)
2544{
2545 return (__m256i) __builtin_ia32_pmulhrsw256_mask ((__v16hi) __X,
2546 (__v16hi) __Y,
2547 (__v16hi) __W,
2548 (__mmask16) __U);
2549}
2550
2551extern __inline __m256i
2552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2553_mm256_maskz_mulhrs_epi16 (__mmask16 __U, __m256i __X, __m256i __Y)
2554{
2555 return (__m256i) __builtin_ia32_pmulhrsw256_mask ((__v16hi) __X,
2556 (__v16hi) __Y,
2557 (__v16hi)
fd79b414 2558 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
2559 (__mmask16) __U);
2560}
2561
2562extern __inline __m256i
2563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2564_mm256_mask_mulhi_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
2565 __m256i __B)
2566{
2567 return (__m256i) __builtin_ia32_pmulhuw256_mask ((__v16hi) __A,
2568 (__v16hi) __B,
2569 (__v16hi) __W,
2570 (__mmask16) __U);
2571}
2572
2573extern __inline __m256i
2574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2575_mm256_maskz_mulhi_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
2576{
2577 return (__m256i) __builtin_ia32_pmulhuw256_mask ((__v16hi) __A,
2578 (__v16hi) __B,
2579 (__v16hi)
fd79b414 2580 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
2581 (__mmask16) __U);
2582}
2583
2584extern __inline __m256i
2585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2586_mm256_mask_mulhi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2587 __m256i __B)
2588{
2589 return (__m256i) __builtin_ia32_pmulhw256_mask ((__v16hi) __A,
2590 (__v16hi) __B,
2591 (__v16hi) __W,
2592 (__mmask16) __U);
2593}
2594
2595extern __inline __m256i
2596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2597_mm256_maskz_mulhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
2598{
2599 return (__m256i) __builtin_ia32_pmulhw256_mask ((__v16hi) __A,
2600 (__v16hi) __B,
2601 (__v16hi)
fd79b414 2602 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
2603 (__mmask16) __U);
2604}
2605
2606extern __inline __m128i
2607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2608_mm_mask_mulhi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
2609 __m128i __B)
2610{
2611 return (__m128i) __builtin_ia32_pmulhw128_mask ((__v8hi) __A,
2612 (__v8hi) __B,
2613 (__v8hi) __W,
2614 (__mmask8) __U);
2615}
2616
2617extern __inline __m128i
2618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2619_mm_maskz_mulhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
2620{
2621 return (__m128i) __builtin_ia32_pmulhw128_mask ((__v8hi) __A,
2622 (__v8hi) __B,
2623 (__v8hi)
fd79b414 2624 _mm_avx512_setzero_si128 (),
936c0fe4
AI
2625 (__mmask8) __U);
2626}
2627
2628extern __inline __m128i
2629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2630_mm_mask_mulhi_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
2631 __m128i __B)
2632{
2633 return (__m128i) __builtin_ia32_pmulhuw128_mask ((__v8hi) __A,
2634 (__v8hi) __B,
2635 (__v8hi) __W,
2636 (__mmask8) __U);
2637}
2638
2639extern __inline __m128i
2640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2641_mm_maskz_mulhi_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
2642{
2643 return (__m128i) __builtin_ia32_pmulhuw128_mask ((__v8hi) __A,
2644 (__v8hi) __B,
2645 (__v8hi)
fd79b414 2646 _mm_avx512_setzero_si128 (),
936c0fe4
AI
2647 (__mmask8) __U);
2648}
2649
2650extern __inline __m128i
2651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2652_mm_mask_mulhrs_epi16 (__m128i __W, __mmask8 __U, __m128i __X,
2653 __m128i __Y)
2654{
2655 return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
2656 (__v8hi) __Y,
2657 (__v8hi) __W,
2658 (__mmask8) __U);
2659}
2660
2661extern __inline __m128i
2662__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2663_mm_maskz_mulhrs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y)
2664{
2665 return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
2666 (__v8hi) __Y,
2667 (__v8hi)
fd79b414 2668 _mm_avx512_setzero_si128 (),
936c0fe4
AI
2669 (__mmask8) __U);
2670}
2671
2672extern __inline __m256i
2673__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2674_mm256_mask_mullo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2675 __m256i __B)
2676{
2677 return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A,
2678 (__v16hi) __B,
2679 (__v16hi) __W,
2680 (__mmask16) __U);
2681}
2682
2683extern __inline __m256i
2684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2685_mm256_maskz_mullo_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
2686{
2687 return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A,
2688 (__v16hi) __B,
2689 (__v16hi)
fd79b414 2690 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
2691 (__mmask16) __U);
2692}
2693
2694extern __inline __m128i
2695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2696_mm_mask_mullo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
2697 __m128i __B)
2698{
2699 return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
2700 (__v8hi) __B,
2701 (__v8hi) __W,
2702 (__mmask8) __U);
2703}
2704
2705extern __inline __m128i
2706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2707_mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
2708{
2709 return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
2710 (__v8hi) __B,
2711 (__v8hi)
fd79b414 2712 _mm_avx512_setzero_si128 (),
936c0fe4
AI
2713 (__mmask8) __U);
2714}
2715
2716extern __inline __m256i
2717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 2718_mm256_mask_cvtepi8_epi16 (__m256i __W, __mmask16 __U, __m128i __A)
936c0fe4
AI
2719{
2720 return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A,
2721 (__v16hi) __W,
2722 (__mmask16) __U);
2723}
2724
2725extern __inline __m256i
2726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2727_mm256_maskz_cvtepi8_epi16 (__mmask16 __U, __m128i __A)
2728{
2729 return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A,
2730 (__v16hi)
fd79b414 2731 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
2732 (__mmask16) __U);
2733}
2734
2735extern __inline __m128i
2736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 2737_mm_mask_cvtepi8_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
936c0fe4
AI
2738{
2739 return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A,
2740 (__v8hi) __W,
2741 (__mmask8) __U);
2742}
2743
2744extern __inline __m128i
2745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2746_mm_maskz_cvtepi8_epi16 (__mmask8 __U, __m128i __A)
2747{
2748 return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A,
2749 (__v8hi)
fd79b414 2750 _mm_avx512_setzero_si128 (),
936c0fe4
AI
2751 (__mmask8) __U);
2752}
2753
2754extern __inline __m256i
2755__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 2756_mm256_mask_cvtepu8_epi16 (__m256i __W, __mmask16 __U, __m128i __A)
936c0fe4
AI
2757{
2758 return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A,
2759 (__v16hi) __W,
2760 (__mmask16) __U);
2761}
2762
2763extern __inline __m256i
2764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2765_mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A)
2766{
2767 return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A,
2768 (__v16hi)
fd79b414 2769 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
2770 (__mmask16) __U);
2771}
2772
2773extern __inline __m128i
2774__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 2775_mm_mask_cvtepu8_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
936c0fe4
AI
2776{
2777 return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A,
2778 (__v8hi) __W,
2779 (__mmask8) __U);
2780}
2781
2782extern __inline __m128i
2783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2784_mm_maskz_cvtepu8_epi16 (__mmask8 __U, __m128i __A)
2785{
2786 return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A,
2787 (__v8hi)
fd79b414 2788 _mm_avx512_setzero_si128 (),
936c0fe4
AI
2789 (__mmask8) __U);
2790}
2791
2792extern __inline __m256i
2793__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2794_mm256_mask_avg_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
2795 __m256i __B)
2796{
2797 return (__m256i) __builtin_ia32_pavgb256_mask ((__v32qi) __A,
2798 (__v32qi) __B,
2799 (__v32qi) __W,
2800 (__mmask32) __U);
2801}
2802
2803extern __inline __m256i
2804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2805_mm256_maskz_avg_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
2806{
2807 return (__m256i) __builtin_ia32_pavgb256_mask ((__v32qi) __A,
2808 (__v32qi) __B,
2809 (__v32qi)
fd79b414 2810 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
2811 (__mmask32) __U);
2812}
2813
2814extern __inline __m128i
2815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2816_mm_mask_avg_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
2817 __m128i __B)
2818{
2819 return (__m128i) __builtin_ia32_pavgb128_mask ((__v16qi) __A,
2820 (__v16qi) __B,
2821 (__v16qi) __W,
2822 (__mmask16) __U);
2823}
2824
2825extern __inline __m128i
2826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2827_mm_maskz_avg_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
2828{
2829 return (__m128i) __builtin_ia32_pavgb128_mask ((__v16qi) __A,
2830 (__v16qi) __B,
2831 (__v16qi)
fd79b414 2832 _mm_avx512_setzero_si128 (),
936c0fe4
AI
2833 (__mmask16) __U);
2834}
2835
2836extern __inline __m256i
2837__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2838_mm256_mask_avg_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
2839 __m256i __B)
2840{
2841 return (__m256i) __builtin_ia32_pavgw256_mask ((__v16hi) __A,
2842 (__v16hi) __B,
2843 (__v16hi) __W,
2844 (__mmask16) __U);
2845}
2846
2847extern __inline __m256i
2848__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2849_mm256_maskz_avg_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
2850{
2851 return (__m256i) __builtin_ia32_pavgw256_mask ((__v16hi) __A,
2852 (__v16hi) __B,
2853 (__v16hi)
fd79b414 2854 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
2855 (__mmask16) __U);
2856}
2857
2858extern __inline __m128i
2859__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2860_mm_mask_avg_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
2861 __m128i __B)
2862{
2863 return (__m128i) __builtin_ia32_pavgw128_mask ((__v8hi) __A,
2864 (__v8hi) __B,
2865 (__v8hi) __W,
2866 (__mmask8) __U);
2867}
2868
2869extern __inline __m128i
2870__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2871_mm_maskz_avg_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
2872{
2873 return (__m128i) __builtin_ia32_pavgw128_mask ((__v8hi) __A,
2874 (__v8hi) __B,
2875 (__v8hi)
fd79b414 2876 _mm_avx512_setzero_si128 (),
936c0fe4
AI
2877 (__mmask8) __U);
2878}
2879
2880extern __inline __m256i
2881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2882_mm256_mask_add_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
2883 __m256i __B)
2884{
2885 return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A,
2886 (__v32qi) __B,
2887 (__v32qi) __W,
2888 (__mmask32) __U);
2889}
2890
2891extern __inline __m256i
2892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2893_mm256_maskz_add_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
2894{
2895 return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A,
2896 (__v32qi) __B,
2897 (__v32qi)
fd79b414 2898 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
2899 (__mmask32) __U);
2900}
2901
2902extern __inline __m256i
2903__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2904_mm256_mask_add_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2905 __m256i __B)
2906{
2907 return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A,
2908 (__v16hi) __B,
2909 (__v16hi) __W,
2910 (__mmask16) __U);
2911}
2912
2913extern __inline __m256i
2914__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2915_mm256_maskz_add_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
2916{
2917 return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A,
2918 (__v16hi) __B,
2919 (__v16hi)
fd79b414 2920 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
2921 (__mmask16) __U);
2922}
2923
2924extern __inline __m256i
2925__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2926_mm256_mask_adds_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
2927 __m256i __B)
2928{
2929 return (__m256i) __builtin_ia32_paddsb256_mask ((__v32qi) __A,
2930 (__v32qi) __B,
2931 (__v32qi) __W,
2932 (__mmask32) __U);
2933}
2934
2935extern __inline __m256i
2936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2937_mm256_maskz_adds_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
2938{
2939 return (__m256i) __builtin_ia32_paddsb256_mask ((__v32qi) __A,
2940 (__v32qi) __B,
2941 (__v32qi)
fd79b414 2942 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
2943 (__mmask32) __U);
2944}
2945
2946extern __inline __m256i
2947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2948_mm256_mask_adds_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
2949 __m256i __B)
2950{
2951 return (__m256i) __builtin_ia32_paddsw256_mask ((__v16hi) __A,
2952 (__v16hi) __B,
2953 (__v16hi) __W,
2954 (__mmask16) __U);
2955}
2956
2957extern __inline __m256i
2958__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2959_mm256_maskz_adds_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
2960{
2961 return (__m256i) __builtin_ia32_paddsw256_mask ((__v16hi) __A,
2962 (__v16hi) __B,
2963 (__v16hi)
fd79b414 2964 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
2965 (__mmask16) __U);
2966}
2967
2968extern __inline __m256i
2969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2970_mm256_mask_adds_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
2971 __m256i __B)
2972{
2973 return (__m256i) __builtin_ia32_paddusb256_mask ((__v32qi) __A,
2974 (__v32qi) __B,
2975 (__v32qi) __W,
2976 (__mmask32) __U);
2977}
2978
2979extern __inline __m256i
2980__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2981_mm256_maskz_adds_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
2982{
2983 return (__m256i) __builtin_ia32_paddusb256_mask ((__v32qi) __A,
2984 (__v32qi) __B,
2985 (__v32qi)
fd79b414 2986 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
2987 (__mmask32) __U);
2988}
2989
2990extern __inline __m256i
2991__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2992_mm256_mask_adds_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
2993 __m256i __B)
2994{
2995 return (__m256i) __builtin_ia32_paddusw256_mask ((__v16hi) __A,
2996 (__v16hi) __B,
2997 (__v16hi) __W,
2998 (__mmask16) __U);
2999}
3000
3001extern __inline __m256i
3002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3003_mm256_maskz_adds_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
3004{
3005 return (__m256i) __builtin_ia32_paddusw256_mask ((__v16hi) __A,
3006 (__v16hi) __B,
3007 (__v16hi)
fd79b414 3008 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
3009 (__mmask16) __U);
3010}
3011
3012extern __inline __m256i
3013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3014_mm256_mask_sub_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
3015 __m256i __B)
3016{
3017 return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A,
3018 (__v32qi) __B,
3019 (__v32qi) __W,
3020 (__mmask32) __U);
3021}
3022
3023extern __inline __m256i
3024__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3025_mm256_maskz_sub_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
3026{
3027 return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A,
3028 (__v32qi) __B,
3029 (__v32qi)
fd79b414 3030 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
3031 (__mmask32) __U);
3032}
3033
3034extern __inline __m256i
3035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3036_mm256_mask_sub_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
3037 __m256i __B)
3038{
3039 return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A,
3040 (__v16hi) __B,
3041 (__v16hi) __W,
3042 (__mmask16) __U);
3043}
3044
3045extern __inline __m256i
3046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3047_mm256_maskz_sub_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
3048{
3049 return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A,
3050 (__v16hi) __B,
3051 (__v16hi)
fd79b414 3052 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
3053 (__mmask16) __U);
3054}
3055
3056extern __inline __m256i
3057__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3058_mm256_mask_subs_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
3059 __m256i __B)
3060{
3061 return (__m256i) __builtin_ia32_psubsb256_mask ((__v32qi) __A,
3062 (__v32qi) __B,
3063 (__v32qi) __W,
3064 (__mmask32) __U);
3065}
3066
3067extern __inline __m256i
3068__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3069_mm256_maskz_subs_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
3070{
3071 return (__m256i) __builtin_ia32_psubsb256_mask ((__v32qi) __A,
3072 (__v32qi) __B,
3073 (__v32qi)
fd79b414 3074 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
3075 (__mmask32) __U);
3076}
3077
3078extern __inline __m256i
3079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3080_mm256_mask_subs_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
3081 __m256i __B)
3082{
3083 return (__m256i) __builtin_ia32_psubsw256_mask ((__v16hi) __A,
3084 (__v16hi) __B,
3085 (__v16hi) __W,
3086 (__mmask16) __U);
3087}
3088
3089extern __inline __m256i
3090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3091_mm256_maskz_subs_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
3092{
3093 return (__m256i) __builtin_ia32_psubsw256_mask ((__v16hi) __A,
3094 (__v16hi) __B,
3095 (__v16hi)
fd79b414 3096 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
3097 (__mmask16) __U);
3098}
3099
3100extern __inline __m256i
3101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3102_mm256_mask_subs_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
3103 __m256i __B)
3104{
3105 return (__m256i) __builtin_ia32_psubusb256_mask ((__v32qi) __A,
3106 (__v32qi) __B,
3107 (__v32qi) __W,
3108 (__mmask32) __U);
3109}
3110
3111extern __inline __m256i
3112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3113_mm256_maskz_subs_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
3114{
3115 return (__m256i) __builtin_ia32_psubusb256_mask ((__v32qi) __A,
3116 (__v32qi) __B,
3117 (__v32qi)
fd79b414 3118 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
3119 (__mmask32) __U);
3120}
3121
3122extern __inline __m256i
3123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3124_mm256_mask_subs_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
3125 __m256i __B)
3126{
3127 return (__m256i) __builtin_ia32_psubusw256_mask ((__v16hi) __A,
3128 (__v16hi) __B,
3129 (__v16hi) __W,
3130 (__mmask16) __U);
3131}
3132
3133extern __inline __m256i
3134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3135_mm256_maskz_subs_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
3136{
3137 return (__m256i) __builtin_ia32_psubusw256_mask ((__v16hi) __A,
3138 (__v16hi) __B,
3139 (__v16hi)
fd79b414 3140 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
3141 (__mmask16) __U);
3142}
3143
3144extern __inline __m128i
3145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3146_mm_mask_add_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
3147 __m128i __B)
3148{
3149 return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A,
3150 (__v16qi) __B,
3151 (__v16qi) __W,
3152 (__mmask16) __U);
3153}
3154
3155extern __inline __m128i
3156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3157_mm_maskz_add_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
3158{
3159 return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A,
3160 (__v16qi) __B,
3161 (__v16qi)
fd79b414 3162 _mm_avx512_setzero_si128 (),
936c0fe4
AI
3163 (__mmask16) __U);
3164}
3165
3166extern __inline __m128i
3167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3168_mm_mask_add_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
3169 __m128i __B)
3170{
3171 return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A,
3172 (__v8hi) __B,
3173 (__v8hi) __W,
3174 (__mmask8) __U);
3175}
3176
3177extern __inline __m128i
3178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3179_mm_maskz_add_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
3180{
3181 return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A,
3182 (__v8hi) __B,
3183 (__v8hi)
fd79b414 3184 _mm_avx512_setzero_si128 (),
936c0fe4
AI
3185 (__mmask8) __U);
3186}
3187
3188extern __inline __m256i
3189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3190_mm256_mask_unpackhi_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
3191 __m256i __B)
3192{
3193 return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
3194 (__v32qi) __B,
3195 (__v32qi) __W,
3196 (__mmask32) __U);
3197}
3198
3199extern __inline __m256i
3200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3201_mm256_maskz_unpackhi_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
3202{
3203 return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
3204 (__v32qi) __B,
3205 (__v32qi)
fd79b414 3206 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
3207 (__mmask32) __U);
3208}
3209
3210extern __inline __m128i
3211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3212_mm_mask_unpackhi_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
3213 __m128i __B)
3214{
3215 return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
3216 (__v16qi) __B,
3217 (__v16qi) __W,
3218 (__mmask16) __U);
3219}
3220
3221extern __inline __m128i
3222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3223_mm_maskz_unpackhi_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
3224{
3225 return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
3226 (__v16qi) __B,
3227 (__v16qi)
fd79b414 3228 _mm_avx512_setzero_si128 (),
936c0fe4
AI
3229 (__mmask16) __U);
3230}
3231
3232extern __inline __m256i
3233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3234_mm256_mask_unpackhi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
3235 __m256i __B)
3236{
3237 return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
3238 (__v16hi) __B,
3239 (__v16hi) __W,
3240 (__mmask16) __U);
3241}
3242
3243extern __inline __m256i
3244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3245_mm256_maskz_unpackhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
3246{
3247 return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
3248 (__v16hi) __B,
3249 (__v16hi)
fd79b414 3250 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
3251 (__mmask16) __U);
3252}
3253
3254extern __inline __m128i
3255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3256_mm_mask_unpackhi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
3257 __m128i __B)
3258{
3259 return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
3260 (__v8hi) __B,
3261 (__v8hi) __W,
3262 (__mmask8) __U);
3263}
3264
3265extern __inline __m128i
3266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3267_mm_maskz_unpackhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
3268{
3269 return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
3270 (__v8hi) __B,
3271 (__v8hi)
fd79b414 3272 _mm_avx512_setzero_si128 (),
936c0fe4
AI
3273 (__mmask8) __U);
3274}
3275
3276extern __inline __m256i
3277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3278_mm256_mask_unpacklo_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
3279 __m256i __B)
3280{
3281 return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
3282 (__v32qi) __B,
3283 (__v32qi) __W,
3284 (__mmask32) __U);
3285}
3286
3287extern __inline __m256i
3288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3289_mm256_maskz_unpacklo_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
3290{
3291 return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
3292 (__v32qi) __B,
3293 (__v32qi)
fd79b414 3294 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
3295 (__mmask32) __U);
3296}
3297
3298extern __inline __m128i
3299__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3300_mm_mask_unpacklo_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
3301 __m128i __B)
3302{
3303 return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
3304 (__v16qi) __B,
3305 (__v16qi) __W,
3306 (__mmask16) __U);
3307}
3308
3309extern __inline __m128i
3310__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3311_mm_maskz_unpacklo_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
3312{
3313 return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
3314 (__v16qi) __B,
3315 (__v16qi)
fd79b414 3316 _mm_avx512_setzero_si128 (),
936c0fe4
AI
3317 (__mmask16) __U);
3318}
3319
3320extern __inline __m256i
3321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3322_mm256_mask_unpacklo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
3323 __m256i __B)
3324{
3325 return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
3326 (__v16hi) __B,
3327 (__v16hi) __W,
3328 (__mmask16) __U);
3329}
3330
3331extern __inline __m256i
3332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3333_mm256_maskz_unpacklo_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
3334{
3335 return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
3336 (__v16hi) __B,
3337 (__v16hi)
fd79b414 3338 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
3339 (__mmask16) __U);
3340}
3341
3342extern __inline __m128i
3343__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3344_mm_mask_unpacklo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
3345 __m128i __B)
3346{
3347 return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
3348 (__v8hi) __B,
3349 (__v8hi) __W,
3350 (__mmask8) __U);
3351}
3352
3353extern __inline __m128i
3354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3355_mm_maskz_unpacklo_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
3356{
3357 return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
3358 (__v8hi) __B,
3359 (__v8hi)
fd79b414 3360 _mm_avx512_setzero_si128 (),
936c0fe4
AI
3361 (__mmask8) __U);
3362}
3363
3364extern __inline __mmask16
3365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3366_mm_cmpeq_epi8_mask (__m128i __A, __m128i __B)
3367{
3368 return (__mmask16) __builtin_ia32_pcmpeqb128_mask ((__v16qi) __A,
3369 (__v16qi) __B,
3370 (__mmask16) -1);
3371}
3372
eee5d6f5
AI
3373extern __inline __mmask16
3374__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3375_mm_cmpeq_epu8_mask (__m128i __A, __m128i __B)
3376{
3377 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __A,
3378 (__v16qi) __B, 0,
3379 (__mmask16) -1);
3380}
3381
3382extern __inline __mmask16
3383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3384_mm_mask_cmpeq_epu8_mask (__mmask16 __U, __m128i __A, __m128i __B)
3385{
3386 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __A,
3387 (__v16qi) __B, 0,
3388 __U);
3389}
3390
936c0fe4
AI
3391extern __inline __mmask16
3392__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3393_mm_mask_cmpeq_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
3394{
3395 return (__mmask16) __builtin_ia32_pcmpeqb128_mask ((__v16qi) __A,
3396 (__v16qi) __B,
3397 __U);
3398}
3399
eee5d6f5
AI
3400extern __inline __mmask32
3401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3402_mm256_cmpeq_epu8_mask (__m256i __A, __m256i __B)
3403{
3404 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __A,
3405 (__v32qi) __B, 0,
3406 (__mmask32) -1);
3407}
3408
936c0fe4
AI
3409extern __inline __mmask32
3410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3411_mm256_cmpeq_epi8_mask (__m256i __A, __m256i __B)
3412{
3413 return (__mmask32) __builtin_ia32_pcmpeqb256_mask ((__v32qi) __A,
3414 (__v32qi) __B,
3415 (__mmask32) -1);
3416}
3417
eee5d6f5
AI
3418extern __inline __mmask32
3419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3420_mm256_mask_cmpeq_epu8_mask (__mmask32 __U, __m256i __A, __m256i __B)
3421{
3422 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __A,
3423 (__v32qi) __B, 0,
3424 __U);
3425}
3426
936c0fe4
AI
3427extern __inline __mmask32
3428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3429_mm256_mask_cmpeq_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
3430{
3431 return (__mmask32) __builtin_ia32_pcmpeqb256_mask ((__v32qi) __A,
3432 (__v32qi) __B,
3433 __U);
3434}
3435
eee5d6f5
AI
3436extern __inline __mmask8
3437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3438_mm_cmpeq_epu16_mask (__m128i __A, __m128i __B)
3439{
3440 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __A,
3441 (__v8hi) __B, 0,
3442 (__mmask8) -1);
3443}
3444
936c0fe4
AI
3445extern __inline __mmask8
3446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3447_mm_cmpeq_epi16_mask (__m128i __A, __m128i __B)
3448{
3449 return (__mmask8) __builtin_ia32_pcmpeqw128_mask ((__v8hi) __A,
3450 (__v8hi) __B,
3451 (__mmask8) -1);
3452}
3453
eee5d6f5
AI
3454extern __inline __mmask8
3455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3456_mm_mask_cmpeq_epu16_mask (__mmask8 __U, __m128i __A, __m128i __B)
3457{
3458 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __A,
3459 (__v8hi) __B, 0, __U);
3460}
3461
936c0fe4
AI
3462extern __inline __mmask8
3463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3464_mm_mask_cmpeq_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
3465{
3466 return (__mmask8) __builtin_ia32_pcmpeqw128_mask ((__v8hi) __A,
3467 (__v8hi) __B, __U);
3468}
3469
eee5d6f5
AI
3470extern __inline __mmask16
3471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3472_mm256_cmpeq_epu16_mask (__m256i __A, __m256i __B)
3473{
3474 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __A,
3475 (__v16hi) __B, 0,
3476 (__mmask16) -1);
3477}
3478
936c0fe4
AI
3479extern __inline __mmask16
3480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3481_mm256_cmpeq_epi16_mask (__m256i __A, __m256i __B)
3482{
3483 return (__mmask16) __builtin_ia32_pcmpeqw256_mask ((__v16hi) __A,
3484 (__v16hi) __B,
3485 (__mmask16) -1);
3486}
3487
eee5d6f5
AI
3488extern __inline __mmask16
3489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3490_mm256_mask_cmpeq_epu16_mask (__mmask16 __U, __m256i __A, __m256i __B)
3491{
3492 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __A,
3493 (__v16hi) __B, 0,
3494 __U);
3495}
3496
936c0fe4
AI
3497extern __inline __mmask16
3498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3499_mm256_mask_cmpeq_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
3500{
3501 return (__mmask16) __builtin_ia32_pcmpeqw256_mask ((__v16hi) __A,
3502 (__v16hi) __B,
3503 __U);
3504}
3505
eee5d6f5
AI
3506extern __inline __mmask16
3507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3508_mm_cmpgt_epu8_mask (__m128i __A, __m128i __B)
3509{
3510 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __A,
3511 (__v16qi) __B, 6,
3512 (__mmask16) -1);
3513}
3514
936c0fe4
AI
3515extern __inline __mmask16
3516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3517_mm_cmpgt_epi8_mask (__m128i __A, __m128i __B)
3518{
3519 return (__mmask16) __builtin_ia32_pcmpgtb128_mask ((__v16qi) __A,
3520 (__v16qi) __B,
3521 (__mmask16) -1);
3522}
3523
eee5d6f5
AI
3524extern __inline __mmask16
3525__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3526_mm_mask_cmpgt_epu8_mask (__mmask16 __U, __m128i __A, __m128i __B)
3527{
3528 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __A,
3529 (__v16qi) __B, 6,
3530 __U);
3531}
3532
936c0fe4
AI
3533extern __inline __mmask16
3534__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3535_mm_mask_cmpgt_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
3536{
3537 return (__mmask16) __builtin_ia32_pcmpgtb128_mask ((__v16qi) __A,
3538 (__v16qi) __B,
3539 __U);
3540}
3541
eee5d6f5
AI
3542extern __inline __mmask32
3543__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3544_mm256_cmpgt_epu8_mask (__m256i __A, __m256i __B)
3545{
3546 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __A,
3547 (__v32qi) __B, 6,
3548 (__mmask32) -1);
3549}
3550
936c0fe4
AI
3551extern __inline __mmask32
3552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3553_mm256_cmpgt_epi8_mask (__m256i __A, __m256i __B)
3554{
3555 return (__mmask32) __builtin_ia32_pcmpgtb256_mask ((__v32qi) __A,
3556 (__v32qi) __B,
3557 (__mmask32) -1);
3558}
3559
eee5d6f5
AI
3560extern __inline __mmask32
3561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3562_mm256_mask_cmpgt_epu8_mask (__mmask32 __U, __m256i __A, __m256i __B)
3563{
3564 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __A,
3565 (__v32qi) __B, 6,
3566 __U);
3567}
3568
936c0fe4
AI
3569extern __inline __mmask32
3570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3571_mm256_mask_cmpgt_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
3572{
3573 return (__mmask32) __builtin_ia32_pcmpgtb256_mask ((__v32qi) __A,
3574 (__v32qi) __B,
3575 __U);
3576}
3577
eee5d6f5
AI
3578extern __inline __mmask8
3579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3580_mm_cmpgt_epu16_mask (__m128i __A, __m128i __B)
3581{
3582 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __A,
3583 (__v8hi) __B, 6,
3584 (__mmask8) -1);
3585}
3586
936c0fe4
AI
3587extern __inline __mmask8
3588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3589_mm_cmpgt_epi16_mask (__m128i __A, __m128i __B)
3590{
3591 return (__mmask8) __builtin_ia32_pcmpgtw128_mask ((__v8hi) __A,
3592 (__v8hi) __B,
3593 (__mmask8) -1);
3594}
3595
eee5d6f5
AI
3596extern __inline __mmask8
3597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3598_mm_mask_cmpgt_epu16_mask (__mmask8 __U, __m128i __A, __m128i __B)
3599{
3600 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __A,
3601 (__v8hi) __B, 6, __U);
3602}
3603
936c0fe4
AI
3604extern __inline __mmask8
3605__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3606_mm_mask_cmpgt_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
3607{
3608 return (__mmask8) __builtin_ia32_pcmpgtw128_mask ((__v8hi) __A,
3609 (__v8hi) __B, __U);
3610}
3611
eee5d6f5
AI
3612extern __inline __mmask16
3613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3614_mm256_cmpgt_epu16_mask (__m256i __A, __m256i __B)
3615{
3616 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __A,
3617 (__v16hi) __B, 6,
3618 (__mmask16) -1);
3619}
3620
936c0fe4
AI
3621extern __inline __mmask16
3622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3623_mm256_cmpgt_epi16_mask (__m256i __A, __m256i __B)
3624{
3625 return (__mmask16) __builtin_ia32_pcmpgtw256_mask ((__v16hi) __A,
3626 (__v16hi) __B,
3627 (__mmask16) -1);
3628}
3629
eee5d6f5
AI
3630extern __inline __mmask16
3631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3632_mm256_mask_cmpgt_epu16_mask (__mmask16 __U, __m256i __A, __m256i __B)
3633{
3634 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __A,
3635 (__v16hi) __B, 6,
3636 __U);
3637}
3638
936c0fe4
AI
3639extern __inline __mmask16
3640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3641_mm256_mask_cmpgt_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
3642{
3643 return (__mmask16) __builtin_ia32_pcmpgtw256_mask ((__v16hi) __A,
3644 (__v16hi) __B,
3645 __U);
3646}
3647
3648extern __inline __mmask16
3649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3650_mm_testn_epi8_mask (__m128i __A, __m128i __B)
3651{
3652 return (__mmask16) __builtin_ia32_ptestnmb128 ((__v16qi) __A,
3653 (__v16qi) __B,
3654 (__mmask16) -1);
3655}
3656
3657extern __inline __mmask16
3658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3659_mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
3660{
3661 return (__mmask16) __builtin_ia32_ptestnmb128 ((__v16qi) __A,
3662 (__v16qi) __B, __U);
3663}
3664
3665extern __inline __mmask32
3666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3667_mm256_testn_epi8_mask (__m256i __A, __m256i __B)
3668{
3669 return (__mmask32) __builtin_ia32_ptestnmb256 ((__v32qi) __A,
3670 (__v32qi) __B,
3671 (__mmask32) -1);
3672}
3673
3674extern __inline __mmask32
3675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3676_mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
3677{
3678 return (__mmask32) __builtin_ia32_ptestnmb256 ((__v32qi) __A,
3679 (__v32qi) __B, __U);
3680}
3681
3682extern __inline __mmask8
3683__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3684_mm_testn_epi16_mask (__m128i __A, __m128i __B)
3685{
3686 return (__mmask8) __builtin_ia32_ptestnmw128 ((__v8hi) __A,
3687 (__v8hi) __B,
3688 (__mmask8) -1);
3689}
3690
3691extern __inline __mmask8
3692__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3693_mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
3694{
3695 return (__mmask8) __builtin_ia32_ptestnmw128 ((__v8hi) __A,
3696 (__v8hi) __B, __U);
3697}
3698
3699extern __inline __mmask16
3700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3701_mm256_testn_epi16_mask (__m256i __A, __m256i __B)
3702{
3703 return (__mmask16) __builtin_ia32_ptestnmw256 ((__v16hi) __A,
3704 (__v16hi) __B,
3705 (__mmask16) -1);
3706}
3707
3708extern __inline __mmask16
3709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3710_mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
3711{
3712 return (__mmask16) __builtin_ia32_ptestnmw256 ((__v16hi) __A,
3713 (__v16hi) __B, __U);
3714}
3715
3716extern __inline __m256i
3717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3718_mm256_mask_shuffle_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
3719 __m256i __B)
3720{
3721 return (__m256i) __builtin_ia32_pshufb256_mask ((__v32qi) __A,
3722 (__v32qi) __B,
3723 (__v32qi) __W,
3724 (__mmask32) __U);
3725}
3726
3727extern __inline __m256i
3728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3729_mm256_maskz_shuffle_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
3730{
3731 return (__m256i) __builtin_ia32_pshufb256_mask ((__v32qi) __A,
3732 (__v32qi) __B,
3733 (__v32qi)
fd79b414 3734 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
3735 (__mmask32) __U);
3736}
3737
3738extern __inline __m128i
3739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3740_mm_mask_shuffle_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
3741 __m128i __B)
3742{
3743 return (__m128i) __builtin_ia32_pshufb128_mask ((__v16qi) __A,
3744 (__v16qi) __B,
3745 (__v16qi) __W,
3746 (__mmask16) __U);
3747}
3748
3749extern __inline __m128i
3750__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3751_mm_maskz_shuffle_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
3752{
3753 return (__m128i) __builtin_ia32_pshufb128_mask ((__v16qi) __A,
3754 (__v16qi) __B,
3755 (__v16qi)
fd79b414 3756 _mm_avx512_setzero_si128 (),
936c0fe4
AI
3757 (__mmask16) __U);
3758}
3759
3760extern __inline __m256i
3761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3762_mm256_maskz_packs_epi16 (__mmask32 __M, __m256i __A, __m256i __B)
3763{
3764 return (__m256i) __builtin_ia32_packsswb256_mask ((__v16hi) __A,
3765 (__v16hi) __B,
3766 (__v32qi)
fd79b414 3767 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
3768 __M);
3769}
3770
3771extern __inline __m256i
3772__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3773_mm256_mask_packs_epi16 (__m256i __W, __mmask32 __M, __m256i __A,
3774 __m256i __B)
3775{
3776 return (__m256i) __builtin_ia32_packsswb256_mask ((__v16hi) __A,
3777 (__v16hi) __B,
3778 (__v32qi) __W,
3779 __M);
3780}
3781
3782extern __inline __m128i
3783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3784_mm_maskz_packs_epi16 (__mmask16 __M, __m128i __A, __m128i __B)
3785{
3786 return (__m128i) __builtin_ia32_packsswb128_mask ((__v8hi) __A,
3787 (__v8hi) __B,
3788 (__v16qi)
fd79b414 3789 _mm_avx512_setzero_si128 (),
936c0fe4
AI
3790 __M);
3791}
3792
3793extern __inline __m128i
3794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3795_mm_mask_packs_epi16 (__m128i __W, __mmask16 __M, __m128i __A,
3796 __m128i __B)
3797{
3798 return (__m128i) __builtin_ia32_packsswb128_mask ((__v8hi) __A,
3799 (__v8hi) __B,
3800 (__v16qi) __W,
3801 __M);
3802}
3803
3804extern __inline __m256i
3805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3806_mm256_maskz_packus_epi16 (__mmask32 __M, __m256i __A, __m256i __B)
3807{
3808 return (__m256i) __builtin_ia32_packuswb256_mask ((__v16hi) __A,
3809 (__v16hi) __B,
3810 (__v32qi)
fd79b414 3811 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
3812 __M);
3813}
3814
3815extern __inline __m256i
3816__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3817_mm256_mask_packus_epi16 (__m256i __W, __mmask32 __M, __m256i __A,
3818 __m256i __B)
3819{
3820 return (__m256i) __builtin_ia32_packuswb256_mask ((__v16hi) __A,
3821 (__v16hi) __B,
3822 (__v32qi) __W,
3823 __M);
3824}
3825
3826extern __inline __m128i
3827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3828_mm_maskz_packus_epi16 (__mmask16 __M, __m128i __A, __m128i __B)
3829{
3830 return (__m128i) __builtin_ia32_packuswb128_mask ((__v8hi) __A,
3831 (__v8hi) __B,
3832 (__v16qi)
fd79b414 3833 _mm_avx512_setzero_si128 (),
936c0fe4
AI
3834 __M);
3835}
3836
3837extern __inline __m128i
3838__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3839_mm_mask_packus_epi16 (__m128i __W, __mmask16 __M, __m128i __A,
3840 __m128i __B)
3841{
3842 return (__m128i) __builtin_ia32_packuswb128_mask ((__v8hi) __A,
3843 (__v8hi) __B,
3844 (__v16qi) __W,
3845 __M);
3846}
3847
3848extern __inline __m256i
3849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3850_mm256_mask_abs_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
3851{
3852 return (__m256i) __builtin_ia32_pabsb256_mask ((__v32qi) __A,
3853 (__v32qi) __W,
3854 (__mmask32) __U);
3855}
3856
3857extern __inline __m256i
3858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3859_mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A)
3860{
3861 return (__m256i) __builtin_ia32_pabsb256_mask ((__v32qi) __A,
3862 (__v32qi)
fd79b414 3863 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
3864 (__mmask32) __U);
3865}
3866
3867extern __inline __m128i
3868__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3869_mm_mask_abs_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
3870{
3871 return (__m128i) __builtin_ia32_pabsb128_mask ((__v16qi) __A,
3872 (__v16qi) __W,
3873 (__mmask16) __U);
3874}
3875
3876extern __inline __m128i
3877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3878_mm_maskz_abs_epi8 (__mmask16 __U, __m128i __A)
3879{
3880 return (__m128i) __builtin_ia32_pabsb128_mask ((__v16qi) __A,
3881 (__v16qi)
fd79b414 3882 _mm_avx512_setzero_si128 (),
936c0fe4
AI
3883 (__mmask16) __U);
3884}
3885
3886extern __inline __m256i
3887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3888_mm256_mask_abs_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
3889{
3890 return (__m256i) __builtin_ia32_pabsw256_mask ((__v16hi) __A,
3891 (__v16hi) __W,
3892 (__mmask16) __U);
3893}
3894
3895extern __inline __m256i
3896__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3897_mm256_maskz_abs_epi16 (__mmask16 __U, __m256i __A)
3898{
3899 return (__m256i) __builtin_ia32_pabsw256_mask ((__v16hi) __A,
3900 (__v16hi)
fd79b414 3901 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
3902 (__mmask16) __U);
3903}
3904
3905extern __inline __m128i
3906__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3907_mm_mask_abs_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
3908{
3909 return (__m128i) __builtin_ia32_pabsw128_mask ((__v8hi) __A,
3910 (__v8hi) __W,
3911 (__mmask8) __U);
3912}
3913
3914extern __inline __m128i
3915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3916_mm_maskz_abs_epi16 (__mmask8 __U, __m128i __A)
3917{
3918 return (__m128i) __builtin_ia32_pabsw128_mask ((__v8hi) __A,
3919 (__v8hi)
fd79b414 3920 _mm_avx512_setzero_si128 (),
936c0fe4
AI
3921 (__mmask8) __U);
3922}
3923
3924extern __inline __mmask32
3925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3926_mm256_cmpneq_epu8_mask (__m256i __X, __m256i __Y)
3927{
3928 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
3929 (__v32qi) __Y, 4,
c42b0bdf 3930 (__mmask32) -1);
936c0fe4
AI
3931}
3932
3933extern __inline __mmask32
3934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3935_mm256_cmplt_epu8_mask (__m256i __X, __m256i __Y)
3936{
3937 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
3938 (__v32qi) __Y, 1,
c42b0bdf 3939 (__mmask32) -1);
936c0fe4
AI
3940}
3941
3942extern __inline __mmask32
3943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3944_mm256_cmpge_epu8_mask (__m256i __X, __m256i __Y)
3945{
3946 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
3947 (__v32qi) __Y, 5,
c42b0bdf 3948 (__mmask32) -1);
936c0fe4
AI
3949}
3950
3951extern __inline __mmask32
3952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3953_mm256_cmple_epu8_mask (__m256i __X, __m256i __Y)
3954{
3955 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
3956 (__v32qi) __Y, 2,
c42b0bdf 3957 (__mmask32) -1);
936c0fe4
AI
3958}
3959
3960extern __inline __mmask16
3961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3962_mm256_cmpneq_epu16_mask (__m256i __X, __m256i __Y)
3963{
3964 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
3965 (__v16hi) __Y, 4,
c42b0bdf 3966 (__mmask16) -1);
936c0fe4
AI
3967}
3968
3969extern __inline __mmask16
3970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3971_mm256_cmplt_epu16_mask (__m256i __X, __m256i __Y)
3972{
3973 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
3974 (__v16hi) __Y, 1,
c42b0bdf 3975 (__mmask16) -1);
936c0fe4
AI
3976}
3977
3978extern __inline __mmask16
3979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3980_mm256_cmpge_epu16_mask (__m256i __X, __m256i __Y)
3981{
3982 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
3983 (__v16hi) __Y, 5,
c42b0bdf 3984 (__mmask16) -1);
936c0fe4
AI
3985}
3986
3987extern __inline __mmask16
3988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3989_mm256_cmple_epu16_mask (__m256i __X, __m256i __Y)
3990{
3991 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
3992 (__v16hi) __Y, 2,
c42b0bdf 3993 (__mmask16) -1);
936c0fe4
AI
3994}
3995
93103603
SP
3996extern __inline void
3997__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3998_mm256_storeu_epi16 (void *__P, __m256i __A)
3999{
4000 *(__v16hi_u *) __P = (__v16hi_u) __A;
4001}
4002
936c0fe4
AI
4003extern __inline void
4004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4005_mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
4006{
fc9cf6da 4007 __builtin_ia32_storedquhi256_mask ((short *) __P,
936c0fe4
AI
4008 (__v16hi) __A,
4009 (__mmask16) __U);
4010}
4011
4012extern __inline void
4013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
93103603
SP
4014_mm_storeu_epi16 (void *__P, __m128i __A)
4015{
4016 *(__v8hi_u *) __P = (__v8hi_u) __A;
4017}
4018
4019extern __inline void
4020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
936c0fe4
AI
4021_mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
4022{
fc9cf6da 4023 __builtin_ia32_storedquhi128_mask ((short *) __P,
936c0fe4
AI
4024 (__v8hi) __A,
4025 (__mmask8) __U);
4026}
4027
4028extern __inline __m128i
4029__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4030_mm_mask_adds_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4031 __m128i __B)
4032{
4033 return (__m128i) __builtin_ia32_paddsw128_mask ((__v8hi) __A,
4034 (__v8hi) __B,
4035 (__v8hi) __W,
4036 (__mmask8) __U);
4037}
4038
4039extern __inline __m128i
4040__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4041_mm_mask_subs_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
4042 __m128i __B)
4043{
4044 return (__m128i) __builtin_ia32_psubsb128_mask ((__v16qi) __A,
4045 (__v16qi) __B,
4046 (__v16qi) __W,
4047 (__mmask16) __U);
4048}
4049
4050extern __inline __m128i
4051__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4052_mm_maskz_subs_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
4053{
4054 return (__m128i) __builtin_ia32_psubsb128_mask ((__v16qi) __A,
4055 (__v16qi) __B,
4056 (__v16qi)
fd79b414 4057 _mm_avx512_setzero_si128 (),
936c0fe4
AI
4058 (__mmask16) __U);
4059}
4060
4061extern __inline __m128i
4062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4063_mm_mask_subs_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4064 __m128i __B)
4065{
4066 return (__m128i) __builtin_ia32_psubsw128_mask ((__v8hi) __A,
4067 (__v8hi) __B,
4068 (__v8hi) __W,
4069 (__mmask8) __U);
4070}
4071
4072extern __inline __m128i
4073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4074_mm_maskz_subs_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4075{
4076 return (__m128i) __builtin_ia32_psubsw128_mask ((__v8hi) __A,
4077 (__v8hi) __B,
4078 (__v8hi)
fd79b414 4079 _mm_avx512_setzero_si128 (),
936c0fe4
AI
4080 (__mmask8) __U);
4081}
4082
4083extern __inline __m128i
4084__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4085_mm_mask_subs_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
4086 __m128i __B)
4087{
4088 return (__m128i) __builtin_ia32_psubusb128_mask ((__v16qi) __A,
4089 (__v16qi) __B,
4090 (__v16qi) __W,
4091 (__mmask16) __U);
4092}
4093
4094extern __inline __m128i
4095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4096_mm_maskz_subs_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
4097{
4098 return (__m128i) __builtin_ia32_psubusb128_mask ((__v16qi) __A,
4099 (__v16qi) __B,
4100 (__v16qi)
fd79b414 4101 _mm_avx512_setzero_si128 (),
936c0fe4
AI
4102 (__mmask16) __U);
4103}
4104
4105extern __inline __m128i
4106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4107_mm_mask_subs_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
4108 __m128i __B)
4109{
4110 return (__m128i) __builtin_ia32_psubusw128_mask ((__v8hi) __A,
4111 (__v8hi) __B,
4112 (__v8hi) __W,
4113 (__mmask8) __U);
4114}
4115
4116extern __inline __m128i
4117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4118_mm_maskz_subs_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
4119{
4120 return (__m128i) __builtin_ia32_psubusw128_mask ((__v8hi) __A,
4121 (__v8hi) __B,
4122 (__v8hi)
fd79b414 4123 _mm_avx512_setzero_si128 (),
936c0fe4
AI
4124 (__mmask8) __U);
4125}
4126
4127extern __inline __m256i
4128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4129_mm256_mask_srl_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
4130 __m128i __B)
4131{
4132 return (__m256i) __builtin_ia32_psrlw256_mask ((__v16hi) __A,
4133 (__v8hi) __B,
4134 (__v16hi) __W,
4135 (__mmask16) __U);
4136}
4137
4138extern __inline __m256i
4139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4140_mm256_maskz_srl_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
4141{
4142 return (__m256i) __builtin_ia32_psrlw256_mask ((__v16hi) __A,
4143 (__v8hi) __B,
4144 (__v16hi)
fd79b414 4145 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
4146 (__mmask16) __U);
4147}
4148
4149extern __inline __m128i
4150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4151_mm_mask_srl_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4152 __m128i __B)
4153{
4154 return (__m128i) __builtin_ia32_psrlw128_mask ((__v8hi) __A,
4155 (__v8hi) __B,
4156 (__v8hi) __W,
4157 (__mmask8) __U);
4158}
4159
4160extern __inline __m128i
4161__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4162_mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4163{
4164 return (__m128i) __builtin_ia32_psrlw128_mask ((__v8hi) __A,
4165 (__v8hi) __B,
4166 (__v8hi)
fd79b414 4167 _mm_avx512_setzero_si128 (),
936c0fe4
AI
4168 (__mmask8) __U);
4169}
4170
4171extern __inline __m256i
4172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4173_mm256_mask_sra_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
4174 __m128i __B)
4175{
4176 return (__m256i) __builtin_ia32_psraw256_mask ((__v16hi) __A,
4177 (__v8hi) __B,
4178 (__v16hi) __W,
4179 (__mmask16) __U);
4180}
4181
4182extern __inline __m256i
4183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4184_mm256_maskz_sra_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
4185{
4186 return (__m256i) __builtin_ia32_psraw256_mask ((__v16hi) __A,
4187 (__v8hi) __B,
4188 (__v16hi)
fd79b414 4189 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
4190 (__mmask16) __U);
4191}
4192
4193extern __inline __m128i
4194__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4195_mm_mask_sra_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4196 __m128i __B)
4197{
4198 return (__m128i) __builtin_ia32_psraw128_mask ((__v8hi) __A,
4199 (__v8hi) __B,
4200 (__v8hi) __W,
4201 (__mmask8) __U);
4202}
4203
4204extern __inline __m128i
4205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4206_mm_maskz_sra_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4207{
4208 return (__m128i) __builtin_ia32_psraw128_mask ((__v8hi) __A,
4209 (__v8hi) __B,
4210 (__v8hi)
fd79b414 4211 _mm_avx512_setzero_si128 (),
936c0fe4
AI
4212 (__mmask8) __U);
4213}
4214
4215extern __inline __m128i
4216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4217_mm_maskz_adds_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4218{
4219 return (__m128i) __builtin_ia32_paddsw128_mask ((__v8hi) __A,
4220 (__v8hi) __B,
4221 (__v8hi)
fd79b414 4222 _mm_avx512_setzero_si128 (),
936c0fe4
AI
4223 (__mmask8) __U);
4224}
4225
4226extern __inline __m128i
4227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4228_mm_mask_adds_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
4229 __m128i __B)
4230{
4231 return (__m128i) __builtin_ia32_paddusb128_mask ((__v16qi) __A,
4232 (__v16qi) __B,
4233 (__v16qi) __W,
4234 (__mmask16) __U);
4235}
4236
4237extern __inline __m128i
4238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4239_mm_maskz_adds_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
4240{
4241 return (__m128i) __builtin_ia32_paddusb128_mask ((__v16qi) __A,
4242 (__v16qi) __B,
4243 (__v16qi)
fd79b414 4244 _mm_avx512_setzero_si128 (),
936c0fe4
AI
4245 (__mmask16) __U);
4246}
4247
4248extern __inline __m128i
4249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4250_mm_mask_adds_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
4251 __m128i __B)
4252{
4253 return (__m128i) __builtin_ia32_paddusw128_mask ((__v8hi) __A,
4254 (__v8hi) __B,
4255 (__v8hi) __W,
4256 (__mmask8) __U);
4257}
4258
4259extern __inline __m128i
4260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4261_mm_maskz_adds_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
4262{
4263 return (__m128i) __builtin_ia32_paddusw128_mask ((__v8hi) __A,
4264 (__v8hi) __B,
4265 (__v8hi)
fd79b414 4266 _mm_avx512_setzero_si128 (),
936c0fe4
AI
4267 (__mmask8) __U);
4268}
4269
4270extern __inline __m128i
4271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4272_mm_mask_sub_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
4273 __m128i __B)
4274{
4275 return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A,
4276 (__v16qi) __B,
4277 (__v16qi) __W,
4278 (__mmask16) __U);
4279}
4280
4281extern __inline __m128i
4282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4283_mm_maskz_sub_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
4284{
4285 return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A,
4286 (__v16qi) __B,
4287 (__v16qi)
fd79b414 4288 _mm_avx512_setzero_si128 (),
936c0fe4
AI
4289 (__mmask16) __U);
4290}
4291
4292extern __inline __m128i
4293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4294_mm_mask_sub_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4295 __m128i __B)
4296{
4297 return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A,
4298 (__v8hi) __B,
4299 (__v8hi) __W,
4300 (__mmask8) __U);
4301}
4302
4303extern __inline __m128i
4304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4305_mm_maskz_sub_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4306{
4307 return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A,
4308 (__v8hi) __B,
4309 (__v8hi)
fd79b414 4310 _mm_avx512_setzero_si128 (),
936c0fe4
AI
4311 (__mmask8) __U);
4312}
4313
4314extern __inline __m128i
4315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4316_mm_mask_adds_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
4317 __m128i __B)
4318{
4319 return (__m128i) __builtin_ia32_paddsb128_mask ((__v16qi) __A,
4320 (__v16qi) __B,
4321 (__v16qi) __W,
4322 (__mmask16) __U);
4323}
4324
4325extern __inline __m128i
4326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4327_mm_maskz_adds_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
4328{
4329 return (__m128i) __builtin_ia32_paddsb128_mask ((__v16qi) __A,
4330 (__v16qi) __B,
4331 (__v16qi)
fd79b414 4332 _mm_avx512_setzero_si128 (),
936c0fe4
AI
4333 (__mmask16) __U);
4334}
4335
4336extern __inline __m128i
4337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4338_mm_cvtepi16_epi8 (__m128i __A)
4339{
4340
4341 return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
4bbabb2a 4342 (__v16qi)_mm_avx512_undefined_si128(),
936c0fe4
AI
4343 (__mmask8) -1);
4344}
4345
c46f9051
JK
4346extern __inline void
4347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4348_mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M,__m128i __A)
4349{
4a948703 4350 __builtin_ia32_pmovwb128mem_mask ((unsigned long long *) __P , (__v8hi) __A, __M);
c46f9051
JK
4351}
4352
936c0fe4
AI
4353extern __inline __m128i
4354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4355_mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
4356{
4357 return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
4358 (__v16qi) __O, __M);
4359}
4360
4361extern __inline __m128i
4362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4363_mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A)
4364{
4365 return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
4366 (__v16qi)
fd79b414 4367 _mm_avx512_setzero_si128 (),
936c0fe4
AI
4368 __M);
4369}
4370
4371extern __inline __m256i
4372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4373_mm256_srav_epi16 (__m256i __A, __m256i __B)
4374{
4375 return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
4376 (__v16hi) __B,
4377 (__v16hi)
fd79b414 4378 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
4379 (__mmask16) -1);
4380}
4381
4382extern __inline __m256i
4383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4384_mm256_mask_srav_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
4385 __m256i __B)
4386{
4387 return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
4388 (__v16hi) __B,
4389 (__v16hi) __W,
4390 (__mmask16) __U);
4391}
4392
4393extern __inline __m256i
4394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4395_mm256_maskz_srav_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
4396{
4397 return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
4398 (__v16hi) __B,
4399 (__v16hi)
fd79b414 4400 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
4401 (__mmask16) __U);
4402}
4403
4404extern __inline __m128i
4405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4406_mm_srav_epi16 (__m128i __A, __m128i __B)
4407{
4408 return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
4409 (__v8hi) __B,
4410 (__v8hi)
fd79b414 4411 _mm_avx512_setzero_si128 (),
936c0fe4
AI
4412 (__mmask8) -1);
4413}
4414
4415extern __inline __m128i
4416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4417_mm_mask_srav_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4418 __m128i __B)
4419{
4420 return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
4421 (__v8hi) __B,
4422 (__v8hi) __W,
4423 (__mmask8) __U);
4424}
4425
4426extern __inline __m128i
4427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4428_mm_maskz_srav_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4429{
4430 return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
4431 (__v8hi) __B,
4432 (__v8hi)
fd79b414 4433 _mm_avx512_setzero_si128 (),
936c0fe4
AI
4434 (__mmask8) __U);
4435}
4436
4437extern __inline __m256i
4438__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4439_mm256_srlv_epi16 (__m256i __A, __m256i __B)
4440{
4441 return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
4442 (__v16hi) __B,
4443 (__v16hi)
fd79b414 4444 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
4445 (__mmask16) -1);
4446}
4447
4448extern __inline __m256i
4449__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4450_mm256_mask_srlv_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
4451 __m256i __B)
4452{
4453 return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
4454 (__v16hi) __B,
4455 (__v16hi) __W,
4456 (__mmask16) __U);
4457}
4458
4459extern __inline __m256i
4460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4461_mm256_maskz_srlv_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
4462{
4463 return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
4464 (__v16hi) __B,
4465 (__v16hi)
fd79b414 4466 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
4467 (__mmask16) __U);
4468}
4469
4470extern __inline __m128i
4471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4472_mm_srlv_epi16 (__m128i __A, __m128i __B)
4473{
4474 return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
4475 (__v8hi) __B,
4476 (__v8hi)
fd79b414 4477 _mm_avx512_setzero_si128 (),
936c0fe4
AI
4478 (__mmask8) -1);
4479}
4480
4481extern __inline __m128i
4482__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4483_mm_mask_srlv_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4484 __m128i __B)
4485{
4486 return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
4487 (__v8hi) __B,
4488 (__v8hi) __W,
4489 (__mmask8) __U);
4490}
4491
4492extern __inline __m128i
4493__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4494_mm_maskz_srlv_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4495{
4496 return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
4497 (__v8hi) __B,
4498 (__v8hi)
fd79b414 4499 _mm_avx512_setzero_si128 (),
936c0fe4
AI
4500 (__mmask8) __U);
4501}
4502
4503extern __inline __m256i
4504__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4505_mm256_sllv_epi16 (__m256i __A, __m256i __B)
4506{
4507 return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
4508 (__v16hi) __B,
4509 (__v16hi)
fd79b414 4510 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
4511 (__mmask16) -1);
4512}
4513
4514extern __inline __m256i
4515__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4516_mm256_mask_sllv_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
4517 __m256i __B)
4518{
4519 return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
4520 (__v16hi) __B,
4521 (__v16hi) __W,
4522 (__mmask16) __U);
4523}
4524
4525extern __inline __m256i
4526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4527_mm256_maskz_sllv_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
4528{
4529 return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
4530 (__v16hi) __B,
4531 (__v16hi)
fd79b414 4532 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
4533 (__mmask16) __U);
4534}
4535
4536extern __inline __m128i
4537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4538_mm_sllv_epi16 (__m128i __A, __m128i __B)
4539{
4540 return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
4541 (__v8hi) __B,
4542 (__v8hi)
fd79b414 4543 _mm_avx512_setzero_si128 (),
936c0fe4
AI
4544 (__mmask8) -1);
4545}
4546
4547extern __inline __m128i
4548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4549_mm_mask_sllv_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4550 __m128i __B)
4551{
4552 return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
4553 (__v8hi) __B,
4554 (__v8hi) __W,
4555 (__mmask8) __U);
4556}
4557
4558extern __inline __m128i
4559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4560_mm_maskz_sllv_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4561{
4562 return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
4563 (__v8hi) __B,
4564 (__v8hi)
fd79b414 4565 _mm_avx512_setzero_si128 (),
936c0fe4
AI
4566 (__mmask8) __U);
4567}
4568
4569extern __inline __m128i
4570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4571_mm_mask_sll_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
4572 __m128i __B)
4573{
4574 return (__m128i) __builtin_ia32_psllw128_mask ((__v8hi) __A,
4575 (__v8hi) __B,
4576 (__v8hi) __W,
4577 (__mmask8) __U);
4578}
4579
4580extern __inline __m128i
4581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4582_mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
4583{
4584 return (__m128i) __builtin_ia32_psllw128_mask ((__v8hi) __A,
4585 (__v8hi) __B,
4586 (__v8hi)
fd79b414 4587 _mm_avx512_setzero_si128 (),
936c0fe4
AI
4588 (__mmask8) __U);
4589}
4590
4591extern __inline __m256i
4592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4593_mm256_mask_sll_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
4594 __m128i __B)
4595{
4596 return (__m256i) __builtin_ia32_psllw256_mask ((__v16hi) __A,
4597 (__v8hi) __B,
4598 (__v16hi) __W,
4599 (__mmask16) __U);
4600}
4601
4602extern __inline __m256i
4603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4604_mm256_maskz_sll_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
4605{
4606 return (__m256i) __builtin_ia32_psllw256_mask ((__v16hi) __A,
4607 (__v8hi) __B,
4608 (__v16hi)
fd79b414 4609 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
4610 (__mmask16) __U);
4611}
4612
4613extern __inline __m256i
4614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4615_mm256_maskz_packus_epi32 (__mmask16 __M, __m256i __A, __m256i __B)
4616{
4617 return (__m256i) __builtin_ia32_packusdw256_mask ((__v8si) __A,
4618 (__v8si) __B,
4619 (__v16hi)
fd79b414 4620 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
4621 __M);
4622}
4623
4624extern __inline __m256i
4625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4626_mm256_mask_packus_epi32 (__m256i __W, __mmask16 __M, __m256i __A,
4627 __m256i __B)
4628{
4629 return (__m256i) __builtin_ia32_packusdw256_mask ((__v8si) __A,
4630 (__v8si) __B,
4631 (__v16hi) __W,
4632 __M);
4633}
4634
4635extern __inline __m128i
4636__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4637_mm_maskz_packus_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
4638{
4639 return (__m128i) __builtin_ia32_packusdw128_mask ((__v4si) __A,
4640 (__v4si) __B,
4641 (__v8hi)
fd79b414 4642 _mm_avx512_setzero_si128 (),
936c0fe4
AI
4643 __M);
4644}
4645
4646extern __inline __m128i
4647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
20e363e4 4648_mm_mask_packus_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
936c0fe4
AI
4649 __m128i __B)
4650{
4651 return (__m128i) __builtin_ia32_packusdw128_mask ((__v4si) __A,
4652 (__v4si) __B,
4653 (__v8hi) __W, __M);
4654}
4655
4656extern __inline __m256i
4657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4658_mm256_maskz_packs_epi32 (__mmask16 __M, __m256i __A, __m256i __B)
4659{
4660 return (__m256i) __builtin_ia32_packssdw256_mask ((__v8si) __A,
4661 (__v8si) __B,
4662 (__v16hi)
fd79b414 4663 _mm256_avx512_setzero_si256 (),
936c0fe4
AI
4664 __M);
4665}
4666
4667extern __inline __m256i
4668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4669_mm256_mask_packs_epi32 (__m256i __W, __mmask16 __M, __m256i __A,
4670 __m256i __B)
4671{
4672 return (__m256i) __builtin_ia32_packssdw256_mask ((__v8si) __A,
4673 (__v8si) __B,
4674 (__v16hi) __W,
4675 __M);
4676}
4677
4678extern __inline __m128i
4679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4680_mm_maskz_packs_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
4681{
4682 return (__m128i) __builtin_ia32_packssdw128_mask ((__v4si) __A,
4683 (__v4si) __B,
4684 (__v8hi)
fd79b414 4685 _mm_avx512_setzero_si128 (),
936c0fe4
AI
4686 __M);
4687}
4688
4689extern __inline __m128i
4690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
20e363e4 4691_mm_mask_packs_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
936c0fe4
AI
4692 __m128i __B)
4693{
4694 return (__m128i) __builtin_ia32_packssdw128_mask ((__v4si) __A,
4695 (__v4si) __B,
4696 (__v8hi) __W, __M);
4697}
4698
eee5d6f5
AI
4699extern __inline __mmask16
4700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4701_mm_mask_cmpneq_epu8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4702{
4703 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
4704 (__v16qi) __Y, 4,
4705 (__mmask16) __M);
4706}
4707
4708extern __inline __mmask16
4709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4710_mm_mask_cmplt_epu8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4711{
4712 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
4713 (__v16qi) __Y, 1,
4714 (__mmask16) __M);
4715}
4716
4717extern __inline __mmask16
4718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4719_mm_mask_cmpge_epu8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4720{
4721 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
4722 (__v16qi) __Y, 5,
4723 (__mmask16) __M);
4724}
4725
4726extern __inline __mmask16
4727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4728_mm_mask_cmple_epu8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4729{
4730 return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
4731 (__v16qi) __Y, 2,
4732 (__mmask16) __M);
4733}
4734
4735extern __inline __mmask8
4736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4737_mm_mask_cmpneq_epu16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4738{
4739 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
4740 (__v8hi) __Y, 4,
4741 (__mmask8) __M);
4742}
4743
4744extern __inline __mmask8
4745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4746_mm_mask_cmplt_epu16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4747{
4748 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
4749 (__v8hi) __Y, 1,
4750 (__mmask8) __M);
4751}
4752
4753extern __inline __mmask8
4754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4755_mm_mask_cmpge_epu16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4756{
4757 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
4758 (__v8hi) __Y, 5,
4759 (__mmask8) __M);
4760}
4761
4762extern __inline __mmask8
4763__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4764_mm_mask_cmple_epu16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4765{
4766 return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
4767 (__v8hi) __Y, 2,
4768 (__mmask8) __M);
4769}
4770
4771extern __inline __mmask16
4772__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4773_mm_mask_cmpneq_epi8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4774{
4775 return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
4776 (__v16qi) __Y, 4,
4777 (__mmask16) __M);
4778}
4779
4780extern __inline __mmask16
4781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4782_mm_mask_cmplt_epi8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4783{
4784 return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
4785 (__v16qi) __Y, 1,
4786 (__mmask16) __M);
4787}
4788
4789extern __inline __mmask16
4790__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4791_mm_mask_cmpge_epi8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4792{
4793 return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
4794 (__v16qi) __Y, 5,
4795 (__mmask16) __M);
4796}
4797
4798extern __inline __mmask16
4799__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4800_mm_mask_cmple_epi8_mask (__mmask16 __M, __m128i __X, __m128i __Y)
4801{
4802 return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
4803 (__v16qi) __Y, 2,
4804 (__mmask16) __M);
4805}
4806
4807extern __inline __mmask8
4808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4809_mm_mask_cmpneq_epi16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4810{
4811 return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
4812 (__v8hi) __Y, 4,
4813 (__mmask8) __M);
4814}
4815
4816extern __inline __mmask8
4817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4818_mm_mask_cmplt_epi16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4819{
4820 return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
4821 (__v8hi) __Y, 1,
4822 (__mmask8) __M);
4823}
4824
4825extern __inline __mmask8
4826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4827_mm_mask_cmpge_epi16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4828{
4829 return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
4830 (__v8hi) __Y, 5,
4831 (__mmask8) __M);
4832}
4833
4834extern __inline __mmask8
4835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4836_mm_mask_cmple_epi16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
4837{
4838 return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
4839 (__v8hi) __Y, 2,
4840 (__mmask8) __M);
4841}
4842
18379eea 4843extern __inline __mmask32
eee5d6f5 4844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 4845_mm256_mask_cmpneq_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
eee5d6f5 4846{
18379eea
JJ
4847 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
4848 (__v32qi) __Y, 4,
4849 (__mmask32) __M);
eee5d6f5
AI
4850}
4851
18379eea 4852extern __inline __mmask32
eee5d6f5 4853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 4854_mm256_mask_cmplt_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
eee5d6f5 4855{
18379eea
JJ
4856 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
4857 (__v32qi) __Y, 1,
4858 (__mmask32) __M);
eee5d6f5
AI
4859}
4860
18379eea 4861extern __inline __mmask32
eee5d6f5 4862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 4863_mm256_mask_cmpge_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
eee5d6f5 4864{
18379eea
JJ
4865 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
4866 (__v32qi) __Y, 5,
4867 (__mmask32) __M);
eee5d6f5
AI
4868}
4869
18379eea 4870extern __inline __mmask32
eee5d6f5 4871__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 4872_mm256_mask_cmple_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
eee5d6f5 4873{
18379eea
JJ
4874 return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
4875 (__v32qi) __Y, 2,
4876 (__mmask32) __M);
eee5d6f5
AI
4877}
4878
18379eea 4879extern __inline __mmask16
eee5d6f5 4880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 4881_mm256_mask_cmpneq_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
eee5d6f5 4882{
18379eea
JJ
4883 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
4884 (__v16hi) __Y, 4,
4885 (__mmask16) __M);
eee5d6f5
AI
4886}
4887
18379eea 4888extern __inline __mmask16
eee5d6f5 4889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 4890_mm256_mask_cmplt_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
eee5d6f5 4891{
18379eea
JJ
4892 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
4893 (__v16hi) __Y, 1,
4894 (__mmask16) __M);
eee5d6f5
AI
4895}
4896
18379eea 4897extern __inline __mmask16
eee5d6f5 4898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 4899_mm256_mask_cmpge_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
eee5d6f5 4900{
18379eea
JJ
4901 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
4902 (__v16hi) __Y, 5,
4903 (__mmask16) __M);
eee5d6f5
AI
4904}
4905
18379eea 4906extern __inline __mmask16
eee5d6f5 4907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 4908_mm256_mask_cmple_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
eee5d6f5 4909{
18379eea
JJ
4910 return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
4911 (__v16hi) __Y, 2,
4912 (__mmask16) __M);
eee5d6f5
AI
4913}
4914
18379eea 4915extern __inline __mmask32
eee5d6f5 4916__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 4917_mm256_mask_cmpneq_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
eee5d6f5 4918{
18379eea
JJ
4919 return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
4920 (__v32qi) __Y, 4,
4921 (__mmask32) __M);
eee5d6f5
AI
4922}
4923
18379eea 4924extern __inline __mmask32
eee5d6f5 4925__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 4926_mm256_mask_cmplt_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
eee5d6f5 4927{
18379eea
JJ
4928 return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
4929 (__v32qi) __Y, 1,
4930 (__mmask32) __M);
eee5d6f5
AI
4931}
4932
18379eea 4933extern __inline __mmask32
eee5d6f5 4934__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 4935_mm256_mask_cmpge_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
eee5d6f5 4936{
18379eea
JJ
4937 return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
4938 (__v32qi) __Y, 5,
4939 (__mmask32) __M);
eee5d6f5
AI
4940}
4941
18379eea 4942extern __inline __mmask32
eee5d6f5 4943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 4944_mm256_mask_cmple_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
eee5d6f5 4945{
18379eea
JJ
4946 return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
4947 (__v32qi) __Y, 2,
4948 (__mmask32) __M);
eee5d6f5
AI
4949}
4950
18379eea 4951extern __inline __mmask16
eee5d6f5 4952__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 4953_mm256_mask_cmpneq_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
eee5d6f5 4954{
18379eea
JJ
4955 return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
4956 (__v16hi) __Y, 4,
4957 (__mmask16) __M);
eee5d6f5
AI
4958}
4959
18379eea 4960extern __inline __mmask16
eee5d6f5 4961__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 4962_mm256_mask_cmplt_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
eee5d6f5 4963{
18379eea
JJ
4964 return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
4965 (__v16hi) __Y, 1,
4966 (__mmask16) __M);
eee5d6f5
AI
4967}
4968
18379eea 4969extern __inline __mmask16
eee5d6f5 4970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 4971_mm256_mask_cmpge_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
eee5d6f5 4972{
18379eea
JJ
4973 return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
4974 (__v16hi) __Y, 5,
4975 (__mmask16) __M);
eee5d6f5
AI
4976}
4977
18379eea 4978extern __inline __mmask16
eee5d6f5 4979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
18379eea 4980_mm256_mask_cmple_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
eee5d6f5 4981{
18379eea
JJ
4982 return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
4983 (__v16hi) __Y, 2,
4984 (__mmask16) __M);
eee5d6f5
AI
4985}
4986
ca3bd377
HL
4987extern __inline short
4988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4989_mm_mask_reduce_add_epi16 (__mmask8 __M, __m128i __W)
4990{
4991 __W = _mm_maskz_mov_epi16 (__M, __W);
4992 _MM_REDUCE_OPERATOR_BASIC_EPI16 (+);
4993}
4994
4995extern __inline short
4996__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4997_mm_mask_reduce_mul_epi16 (__mmask8 __M, __m128i __W)
4998{
e9529ff3 4999 __W = _mm_mask_mov_epi16 (_mm_avx512_set1_epi16 (1), __M, __W);
ca3bd377
HL
5000 _MM_REDUCE_OPERATOR_BASIC_EPI16 (*);
5001}
5002
5003extern __inline short
5004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5005_mm_mask_reduce_and_epi16 (__mmask8 __M, __m128i __W)
5006{
e9529ff3 5007 __W = _mm_mask_mov_epi16 (_mm_avx512_set1_epi16 (-1), __M, __W);
ca3bd377
HL
5008 _MM_REDUCE_OPERATOR_BASIC_EPI16 (&);
5009}
5010
5011extern __inline short
5012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5013_mm_mask_reduce_or_epi16 (__mmask8 __M, __m128i __W)
5014{
5015 __W = _mm_maskz_mov_epi16 (__M, __W);
5016 _MM_REDUCE_OPERATOR_BASIC_EPI16 (|);
5017}
5018
5019extern __inline short
5020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5021_mm_mask_reduce_max_epi16 (__mmask16 __M, __m128i __V)
5022{
e9529ff3
HJ
5023 __V = _mm_mask_mov_epi16 (_mm_avx512_set1_epi16 (-32767-1), __M, __V);
5024 _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (avx512_max_epi16);
ca3bd377
HL
5025}
5026
5027extern __inline unsigned short
5028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5029_mm_mask_reduce_max_epu16 (__mmask16 __M, __m128i __V)
5030{
5031 __V = _mm_maskz_mov_epi16 (__M, __V);
e9529ff3 5032 _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (avx512_max_epu16);
ca3bd377
HL
5033}
5034
5035extern __inline short
5036__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5037_mm_mask_reduce_min_epi16 (__mmask16 __M, __m128i __V)
5038{
e9529ff3
HJ
5039 __V = _mm_mask_mov_epi16 (_mm_avx512_set1_epi16 (32767), __M, __V);
5040 _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (avx512_min_epi16);
ca3bd377
HL
5041}
5042
5043extern __inline unsigned short
5044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5045_mm_mask_reduce_min_epu16 (__mmask16 __M, __m128i __V)
5046{
e9529ff3
HJ
5047 __V = _mm_mask_mov_epi16 (_mm_avx512_set1_epi16 (-1), __M, __V);
5048 _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (avx512_min_epu16);
ca3bd377
HL
5049}
5050
5051extern __inline short
5052__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5053_mm256_mask_reduce_add_epi16 (__mmask16 __M, __m256i __W)
5054{
5055 __W = _mm256_maskz_mov_epi16 (__M, __W);
e9529ff3 5056 _MM256_AVX512_REDUCE_OPERATOR_BASIC_EPI16 (+);
ca3bd377
HL
5057}
5058
5059extern __inline short
5060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5061_mm256_mask_reduce_mul_epi16 (__mmask16 __M, __m256i __W)
5062{
e9529ff3
HJ
5063 __W = _mm256_mask_mov_epi16 (_mm256_avx512_set1_epi16 (1), __M, __W);
5064 _MM256_AVX512_REDUCE_OPERATOR_BASIC_EPI16 (*);
ca3bd377
HL
5065}
5066
5067extern __inline short
5068__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5069_mm256_mask_reduce_and_epi16 (__mmask16 __M, __m256i __W)
5070{
e9529ff3
HJ
5071 __W = _mm256_mask_mov_epi16 (_mm256_avx512_set1_epi16 (-1), __M, __W);
5072 _MM256_AVX512_REDUCE_OPERATOR_BASIC_EPI16 (&);
ca3bd377
HL
5073}
5074
5075extern __inline short
5076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5077_mm256_mask_reduce_or_epi16 (__mmask16 __M, __m256i __W)
5078{
5079 __W = _mm256_maskz_mov_epi16 (__M, __W);
e9529ff3 5080 _MM256_AVX512_REDUCE_OPERATOR_BASIC_EPI16 (|);
ca3bd377
HL
5081}
5082
5083extern __inline short
5084__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5085_mm256_mask_reduce_max_epi16 (__mmask16 __M, __m256i __V)
5086{
e9529ff3
HJ
5087 __V = _mm256_mask_mov_epi16 (_mm256_avx512_set1_epi16 (-32767-1), __M, __V);
5088 _MM256_AVX512_REDUCE_OPERATOR_MAX_MIN_EP16 (max_epi16);
ca3bd377
HL
5089}
5090
5091extern __inline unsigned short
5092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5093_mm256_mask_reduce_max_epu16 (__mmask16 __M, __m256i __V)
5094{
5095 __V = _mm256_maskz_mov_epi16 (__M, __V);
e9529ff3 5096 _MM256_AVX512_REDUCE_OPERATOR_MAX_MIN_EP16 (max_epu16);
ca3bd377
HL
5097}
5098
5099extern __inline short
5100__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5101_mm256_mask_reduce_min_epi16 (__mmask16 __M, __m256i __V)
5102{
e9529ff3
HJ
5103 __V = _mm256_mask_mov_epi16 (_mm256_avx512_set1_epi16 (32767), __M, __V);
5104 _MM256_AVX512_REDUCE_OPERATOR_MAX_MIN_EP16 (min_epi16);
ca3bd377
HL
5105}
5106
5107extern __inline unsigned short
5108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5109_mm256_mask_reduce_min_epu16 (__mmask16 __M, __m256i __V)
5110{
e9529ff3
HJ
5111 __V = _mm256_mask_mov_epi16 (_mm256_avx512_set1_epi16 (-1), __M, __V);
5112 _MM256_AVX512_REDUCE_OPERATOR_MAX_MIN_EP16 (min_epu16);
ca3bd377
HL
5113}
5114
5115extern __inline char
5116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5117_mm_mask_reduce_add_epi8 (__mmask16 __M, __m128i __W)
5118{
5119 __W = _mm_maskz_mov_epi8 (__M, __W);
5120 _MM_REDUCE_OPERATOR_BASIC_EPI8 (+);
5121}
5122
5123extern __inline char
5124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5125_mm_mask_reduce_mul_epi8 (__mmask16 __M, __m128i __W)
5126{
e9529ff3 5127 __W = _mm_mask_mov_epi8 (_mm_avx512_set1_epi8 (1), __M, __W);
ca3bd377
HL
5128 _MM_REDUCE_OPERATOR_BASIC_EPI8 (*);
5129}
5130
5131extern __inline char
5132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5133_mm_mask_reduce_and_epi8 (__mmask16 __M, __m128i __W)
5134{
e9529ff3 5135 __W = _mm_mask_mov_epi8 (_mm_avx512_set1_epi8 (-1), __M, __W);
ca3bd377
HL
5136 _MM_REDUCE_OPERATOR_BASIC_EPI8 (&);
5137}
5138
5139extern __inline char
5140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5141_mm_mask_reduce_or_epi8 (__mmask16 __M, __m128i __W)
5142{
5143 __W = _mm_maskz_mov_epi8 (__M, __W);
5144 _MM_REDUCE_OPERATOR_BASIC_EPI8 (|);
5145}
5146
5147extern __inline signed char
5148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5149_mm_mask_reduce_max_epi8 (__mmask16 __M, __m128i __V)
5150{
e9529ff3
HJ
5151 __V = _mm_mask_mov_epi8 (_mm_avx512_set1_epi8 (-127-1), __M, __V);
5152 _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (avx512_max_epi8);
ca3bd377
HL
5153}
5154
5155extern __inline unsigned char
5156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5157_mm_mask_reduce_max_epu8 (__mmask16 __M, __m128i __V)
5158{
5159 __V = _mm_maskz_mov_epi8 (__M, __V);
e9529ff3 5160 _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (avx512_max_epu8);
ca3bd377
HL
5161}
5162
5163extern __inline signed char
5164__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5165_mm_mask_reduce_min_epi8 (__mmask16 __M, __m128i __V)
5166{
e9529ff3
HJ
5167 __V = _mm_mask_mov_epi8 (_mm_avx512_set1_epi8 (127), __M, __V);
5168 _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (avx512_min_epi8);
ca3bd377
HL
5169}
5170
5171extern __inline unsigned char
5172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5173_mm_mask_reduce_min_epu8 (__mmask16 __M, __m128i __V)
5174{
e9529ff3
HJ
5175 __V = _mm_mask_mov_epi8 (_mm_avx512_set1_epi8 (-1), __M, __V);
5176 _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (avx512_min_epu8);
ca3bd377
HL
5177}
5178
5179extern __inline char
5180__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5181_mm256_mask_reduce_add_epi8 (__mmask32 __M, __m256i __W)
5182{
5183 __W = _mm256_maskz_mov_epi8 (__M, __W);
e9529ff3 5184 _MM256_AVX512_REDUCE_OPERATOR_BASIC_EPI8 (+);
ca3bd377
HL
5185}
5186
5187extern __inline char
5188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5189_mm256_mask_reduce_mul_epi8 (__mmask32 __M, __m256i __W)
5190{
e9529ff3
HJ
5191 __W = _mm256_mask_mov_epi8 (_mm256_avx512_set1_epi8 (1), __M, __W);
5192 _MM256_AVX512_REDUCE_OPERATOR_BASIC_EPI8 (*);
ca3bd377
HL
5193}
5194
5195extern __inline char
5196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5197_mm256_mask_reduce_and_epi8 (__mmask32 __M, __m256i __W)
5198{
e9529ff3
HJ
5199 __W = _mm256_mask_mov_epi8 (_mm256_avx512_set1_epi8 (-1), __M, __W);
5200 _MM256_AVX512_REDUCE_OPERATOR_BASIC_EPI8 (&);
ca3bd377
HL
5201}
5202
5203extern __inline char
5204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5205_mm256_mask_reduce_or_epi8 (__mmask32 __M, __m256i __W)
5206{
5207 __W = _mm256_maskz_mov_epi8 (__M, __W);
e9529ff3 5208 _MM256_AVX512_REDUCE_OPERATOR_BASIC_EPI8 (|);
ca3bd377
HL
5209}
5210
5211extern __inline signed char
5212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5213_mm256_mask_reduce_max_epi8 (__mmask32 __M, __m256i __V)
5214{
e9529ff3
HJ
5215 __V = _mm256_mask_mov_epi8 (_mm256_avx512_set1_epi8 (-127-1), __M, __V);
5216 _MM256_AVX512_REDUCE_OPERATOR_MAX_MIN_EP8 (max_epi8);
ca3bd377
HL
5217}
5218
5219extern __inline unsigned char
5220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5221_mm256_mask_reduce_max_epu8 (__mmask32 __M, __m256i __V)
5222{
5223 __V = _mm256_maskz_mov_epi8 (__M, __V);
e9529ff3 5224 _MM256_AVX512_REDUCE_OPERATOR_MAX_MIN_EP8 (max_epu8);
ca3bd377
HL
5225}
5226
5227extern __inline signed char
5228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5229_mm256_mask_reduce_min_epi8 (__mmask32 __M, __m256i __V)
5230{
e9529ff3
HJ
5231 __V = _mm256_mask_mov_epi8 (_mm256_avx512_set1_epi8 (127), __M, __V);
5232 _MM256_AVX512_REDUCE_OPERATOR_MAX_MIN_EP8 (min_epi8);
ca3bd377
HL
5233}
5234
5235extern __inline unsigned char
5236__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5237_mm256_mask_reduce_min_epu8 (__mmask32 __M, __m256i __V)
5238{
e9529ff3
HJ
5239 __V = _mm256_mask_mov_epi8 (_mm256_avx512_set1_epi8 (-1), __M, __V);
5240 _MM256_AVX512_REDUCE_OPERATOR_MAX_MIN_EP8 (min_epu8);
ca3bd377
HL
5241}
5242
936c0fe4
AI
5243#ifdef __DISABLE_AVX512VLBW__
5244#undef __DISABLE_AVX512VLBW__
5245#pragma GCC pop_options
5246#endif /* __DISABLE_AVX512VLBW__ */
5247
5248#endif /* _AVX512VLBWINTRIN_H_INCLUDED */