]>
Commit | Line | Data |
---|---|---|
83ffe9cd | 1 | /* Copyright (C) 2014-2023 Free Software Foundation, Inc. |
936c0fe4 AI |
2 | |
3 | This file is part of GCC. | |
4 | ||
5 | GCC is free software; you can redistribute it and/or modify | |
6 | it under the terms of the GNU General Public License as published by | |
7 | the Free Software Foundation; either version 3, or (at your option) | |
8 | any later version. | |
9 | ||
10 | GCC is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | GNU General Public License for more details. | |
14 | ||
15 | Under Section 7 of GPL version 3, you are granted additional | |
16 | permissions described in the GCC Runtime Library Exception, version | |
17 | 3.1, as published by the Free Software Foundation. | |
18 | ||
19 | You should have received a copy of the GNU General Public License and | |
20 | a copy of the GCC Runtime Library Exception along with this program; | |
21 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 | <http://www.gnu.org/licenses/>. */ | |
23 | ||
24 | #ifndef _IMMINTRIN_H_INCLUDED | |
25 | #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead." | |
26 | #endif | |
27 | ||
28 | #ifndef _AVX512VLDQINTRIN_H_INCLUDED | |
29 | #define _AVX512VLDQINTRIN_H_INCLUDED | |
30 | ||
1ce82f56 HJ |
31 | #if !defined(__AVX512VL__) || !defined(__AVX512DQ__) |
32 | #pragma GCC push_options | |
33 | #pragma GCC target("avx512vl,avx512dq") | |
34 | #define __DISABLE_AVX512VLDQ__ | |
35 | #endif /* __AVX512VLDQ__ */ | |
36 | ||
936c0fe4 AI |
37 | extern __inline __m256i |
38 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
39 | _mm256_cvttpd_epi64 (__m256d __A) | |
40 | { | |
41 | return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, | |
42 | (__v4di) | |
43 | _mm256_setzero_si256 (), | |
44 | (__mmask8) -1); | |
45 | } | |
46 | ||
47 | extern __inline __m256i | |
48 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
49 | _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) | |
50 | { | |
51 | return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, | |
52 | (__v4di) __W, | |
53 | (__mmask8) __U); | |
54 | } | |
55 | ||
56 | extern __inline __m256i | |
57 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
58 | _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) | |
59 | { | |
60 | return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, | |
61 | (__v4di) | |
62 | _mm256_setzero_si256 (), | |
63 | (__mmask8) __U); | |
64 | } | |
65 | ||
66 | extern __inline __m128i | |
67 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
68 | _mm_cvttpd_epi64 (__m128d __A) | |
69 | { | |
70 | return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, | |
71 | (__v2di) | |
a25a7887 | 72 | _mm_setzero_si128 (), |
936c0fe4 AI |
73 | (__mmask8) -1); |
74 | } | |
75 | ||
76 | extern __inline __m128i | |
77 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
78 | _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) | |
79 | { | |
80 | return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, | |
81 | (__v2di) __W, | |
82 | (__mmask8) __U); | |
83 | } | |
84 | ||
85 | extern __inline __m128i | |
86 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
87 | _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) | |
88 | { | |
89 | return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, | |
90 | (__v2di) | |
91 | _mm_setzero_si128 (), | |
92 | (__mmask8) __U); | |
93 | } | |
94 | ||
95 | extern __inline __m256i | |
96 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
97 | _mm256_cvttpd_epu64 (__m256d __A) | |
98 | { | |
99 | return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, | |
100 | (__v4di) | |
101 | _mm256_setzero_si256 (), | |
102 | (__mmask8) -1); | |
103 | } | |
104 | ||
105 | extern __inline __m256i | |
106 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
107 | _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) | |
108 | { | |
109 | return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, | |
110 | (__v4di) __W, | |
111 | (__mmask8) __U); | |
112 | } | |
113 | ||
114 | extern __inline __m256i | |
115 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
116 | _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) | |
117 | { | |
118 | return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, | |
119 | (__v4di) | |
120 | _mm256_setzero_si256 (), | |
121 | (__mmask8) __U); | |
122 | } | |
123 | ||
124 | extern __inline __m128i | |
125 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
126 | _mm_cvttpd_epu64 (__m128d __A) | |
127 | { | |
128 | return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, | |
129 | (__v2di) | |
a25a7887 | 130 | _mm_setzero_si128 (), |
936c0fe4 AI |
131 | (__mmask8) -1); |
132 | } | |
133 | ||
134 | extern __inline __m128i | |
135 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
136 | _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) | |
137 | { | |
138 | return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, | |
139 | (__v2di) __W, | |
140 | (__mmask8) __U); | |
141 | } | |
142 | ||
143 | extern __inline __m128i | |
144 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
145 | _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) | |
146 | { | |
147 | return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, | |
148 | (__v2di) | |
149 | _mm_setzero_si128 (), | |
150 | (__mmask8) __U); | |
151 | } | |
152 | ||
153 | extern __inline __m256i | |
154 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
155 | _mm256_cvtpd_epi64 (__m256d __A) | |
156 | { | |
157 | return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, | |
158 | (__v4di) | |
159 | _mm256_setzero_si256 (), | |
160 | (__mmask8) -1); | |
161 | } | |
162 | ||
163 | extern __inline __m256i | |
164 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
165 | _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) | |
166 | { | |
167 | return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, | |
168 | (__v4di) __W, | |
169 | (__mmask8) __U); | |
170 | } | |
171 | ||
172 | extern __inline __m256i | |
173 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
174 | _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) | |
175 | { | |
176 | return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, | |
177 | (__v4di) | |
178 | _mm256_setzero_si256 (), | |
179 | (__mmask8) __U); | |
180 | } | |
181 | ||
182 | extern __inline __m128i | |
183 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
184 | _mm_cvtpd_epi64 (__m128d __A) | |
185 | { | |
186 | return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, | |
187 | (__v2di) | |
a25a7887 | 188 | _mm_setzero_si128 (), |
936c0fe4 AI |
189 | (__mmask8) -1); |
190 | } | |
191 | ||
192 | extern __inline __m128i | |
193 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
194 | _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) | |
195 | { | |
196 | return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, | |
197 | (__v2di) __W, | |
198 | (__mmask8) __U); | |
199 | } | |
200 | ||
201 | extern __inline __m128i | |
202 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
203 | _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) | |
204 | { | |
205 | return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, | |
206 | (__v2di) | |
207 | _mm_setzero_si128 (), | |
208 | (__mmask8) __U); | |
209 | } | |
210 | ||
211 | extern __inline __m256i | |
212 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
213 | _mm256_cvtpd_epu64 (__m256d __A) | |
214 | { | |
215 | return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, | |
216 | (__v4di) | |
217 | _mm256_setzero_si256 (), | |
218 | (__mmask8) -1); | |
219 | } | |
220 | ||
221 | extern __inline __m256i | |
222 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
223 | _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) | |
224 | { | |
225 | return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, | |
226 | (__v4di) __W, | |
227 | (__mmask8) __U); | |
228 | } | |
229 | ||
230 | extern __inline __m256i | |
231 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
232 | _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) | |
233 | { | |
234 | return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, | |
235 | (__v4di) | |
236 | _mm256_setzero_si256 (), | |
237 | (__mmask8) __U); | |
238 | } | |
239 | ||
240 | extern __inline __m128i | |
241 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
242 | _mm_cvtpd_epu64 (__m128d __A) | |
243 | { | |
244 | return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, | |
245 | (__v2di) | |
a25a7887 | 246 | _mm_setzero_si128 (), |
936c0fe4 AI |
247 | (__mmask8) -1); |
248 | } | |
249 | ||
250 | extern __inline __m128i | |
251 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
252 | _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) | |
253 | { | |
254 | return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, | |
255 | (__v2di) __W, | |
256 | (__mmask8) __U); | |
257 | } | |
258 | ||
259 | extern __inline __m128i | |
260 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
261 | _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) | |
262 | { | |
263 | return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, | |
264 | (__v2di) | |
265 | _mm_setzero_si128 (), | |
266 | (__mmask8) __U); | |
267 | } | |
268 | ||
269 | extern __inline __m256i | |
270 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
271 | _mm256_cvttps_epi64 (__m128 __A) | |
272 | { | |
273 | return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, | |
274 | (__v4di) | |
275 | _mm256_setzero_si256 (), | |
276 | (__mmask8) -1); | |
277 | } | |
278 | ||
279 | extern __inline __m256i | |
280 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
281 | _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) | |
282 | { | |
283 | return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, | |
284 | (__v4di) __W, | |
285 | (__mmask8) __U); | |
286 | } | |
287 | ||
288 | extern __inline __m256i | |
289 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
290 | _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) | |
291 | { | |
292 | return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, | |
293 | (__v4di) | |
294 | _mm256_setzero_si256 (), | |
295 | (__mmask8) __U); | |
296 | } | |
297 | ||
298 | extern __inline __m128i | |
299 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
300 | _mm_cvttps_epi64 (__m128 __A) | |
301 | { | |
302 | return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, | |
303 | (__v2di) | |
a25a7887 | 304 | _mm_setzero_si128 (), |
936c0fe4 AI |
305 | (__mmask8) -1); |
306 | } | |
307 | ||
308 | extern __inline __m128i | |
309 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
310 | _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) | |
311 | { | |
312 | return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, | |
313 | (__v2di) __W, | |
314 | (__mmask8) __U); | |
315 | } | |
316 | ||
317 | extern __inline __m128i | |
318 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
319 | _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) | |
320 | { | |
321 | return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, | |
322 | (__v2di) | |
a25a7887 | 323 | _mm_setzero_si128 (), |
936c0fe4 AI |
324 | (__mmask8) __U); |
325 | } | |
326 | ||
327 | extern __inline __m256i | |
328 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
329 | _mm256_cvttps_epu64 (__m128 __A) | |
330 | { | |
331 | return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, | |
332 | (__v4di) | |
333 | _mm256_setzero_si256 (), | |
334 | (__mmask8) -1); | |
335 | } | |
336 | ||
337 | extern __inline __m256i | |
338 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
339 | _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) | |
340 | { | |
341 | return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, | |
342 | (__v4di) __W, | |
343 | (__mmask8) __U); | |
344 | } | |
345 | ||
346 | extern __inline __m256i | |
347 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
348 | _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) | |
349 | { | |
350 | return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, | |
351 | (__v4di) | |
352 | _mm256_setzero_si256 (), | |
353 | (__mmask8) __U); | |
354 | } | |
355 | ||
356 | extern __inline __m128i | |
357 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
358 | _mm_cvttps_epu64 (__m128 __A) | |
359 | { | |
360 | return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, | |
361 | (__v2di) | |
a25a7887 | 362 | _mm_setzero_si128 (), |
936c0fe4 AI |
363 | (__mmask8) -1); |
364 | } | |
365 | ||
366 | extern __inline __m128i | |
367 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
368 | _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) | |
369 | { | |
370 | return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, | |
371 | (__v2di) __W, | |
372 | (__mmask8) __U); | |
373 | } | |
374 | ||
375 | extern __inline __m128i | |
376 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
377 | _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) | |
378 | { | |
379 | return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, | |
380 | (__v2di) | |
a25a7887 | 381 | _mm_setzero_si128 (), |
936c0fe4 AI |
382 | (__mmask8) __U); |
383 | } | |
384 | ||
385 | extern __inline __m256d | |
386 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
387 | _mm256_broadcast_f64x2 (__m128d __A) | |
388 | { | |
389 | return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) | |
390 | __A, | |
391 | (__v4df)_mm256_undefined_pd(), | |
c42b0bdf | 392 | (__mmask8) -1); |
936c0fe4 AI |
393 | } |
394 | ||
395 | extern __inline __m256d | |
396 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
397 | _mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A) | |
398 | { | |
399 | return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) | |
400 | __A, | |
401 | (__v4df) | |
402 | __O, __M); | |
403 | } | |
404 | ||
405 | extern __inline __m256d | |
406 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
407 | _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) | |
408 | { | |
409 | return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) | |
410 | __A, | |
411 | (__v4df) | |
412 | _mm256_setzero_ps (), | |
413 | __M); | |
414 | } | |
415 | ||
416 | extern __inline __m256i | |
417 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
418 | _mm256_broadcast_i64x2 (__m128i __A) | |
419 | { | |
420 | return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) | |
421 | __A, | |
422 | (__v4di)_mm256_undefined_si256(), | |
c42b0bdf | 423 | (__mmask8) -1); |
936c0fe4 AI |
424 | } |
425 | ||
426 | extern __inline __m256i | |
427 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
428 | _mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A) | |
429 | { | |
430 | return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) | |
431 | __A, | |
432 | (__v4di) | |
433 | __O, __M); | |
434 | } | |
435 | ||
436 | extern __inline __m256i | |
437 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
438 | _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) | |
439 | { | |
440 | return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) | |
441 | __A, | |
442 | (__v4di) | |
443 | _mm256_setzero_si256 (), | |
444 | __M); | |
445 | } | |
446 | ||
447 | extern __inline __m256 | |
448 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
449 | _mm256_broadcast_f32x2 (__m128 __A) | |
450 | { | |
451 | return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, | |
452 | (__v8sf)_mm256_undefined_ps(), | |
c42b0bdf | 453 | (__mmask8) -1); |
936c0fe4 AI |
454 | } |
455 | ||
456 | extern __inline __m256 | |
457 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
458 | _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) | |
459 | { | |
460 | return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, | |
461 | (__v8sf) __O, | |
462 | __M); | |
463 | } | |
464 | ||
465 | extern __inline __m256 | |
466 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
467 | _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) | |
468 | { | |
469 | return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, | |
470 | (__v8sf) | |
471 | _mm256_setzero_ps (), | |
472 | __M); | |
473 | } | |
474 | ||
475 | extern __inline __m256i | |
476 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
477 | _mm256_broadcast_i32x2 (__m128i __A) | |
478 | { | |
479 | return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) | |
480 | __A, | |
481 | (__v8si)_mm256_undefined_si256(), | |
c42b0bdf | 482 | (__mmask8) -1); |
936c0fe4 AI |
483 | } |
484 | ||
485 | extern __inline __m256i | |
486 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
487 | _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) | |
488 | { | |
489 | return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) | |
490 | __A, | |
491 | (__v8si) | |
492 | __O, __M); | |
493 | } | |
494 | ||
495 | extern __inline __m256i | |
496 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
497 | _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) | |
498 | { | |
499 | return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) | |
500 | __A, | |
501 | (__v8si) | |
502 | _mm256_setzero_si256 (), | |
503 | __M); | |
504 | } | |
505 | ||
506 | extern __inline __m128i | |
507 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
508 | _mm_broadcast_i32x2 (__m128i __A) | |
509 | { | |
510 | return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) | |
511 | __A, | |
512 | (__v4si)_mm_undefined_si128(), | |
c42b0bdf | 513 | (__mmask8) -1); |
936c0fe4 AI |
514 | } |
515 | ||
516 | extern __inline __m128i | |
517 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
518 | _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) | |
519 | { | |
520 | return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) | |
521 | __A, | |
522 | (__v4si) | |
523 | __O, __M); | |
524 | } | |
525 | ||
526 | extern __inline __m128i | |
527 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
528 | _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) | |
529 | { | |
530 | return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) | |
531 | __A, | |
532 | (__v4si) | |
533 | _mm_setzero_si128 (), | |
534 | __M); | |
535 | } | |
536 | ||
537 | extern __inline __m256i | |
538 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
539 | _mm256_mullo_epi64 (__m256i __A, __m256i __B) | |
540 | { | |
2069d6fc | 541 | return (__m256i) ((__v4du) __A * (__v4du) __B); |
936c0fe4 AI |
542 | } |
543 | ||
544 | extern __inline __m256i | |
545 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
546 | _mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, | |
547 | __m256i __B) | |
548 | { | |
549 | return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, | |
550 | (__v4di) __B, | |
551 | (__v4di) __W, | |
552 | (__mmask8) __U); | |
553 | } | |
554 | ||
555 | extern __inline __m256i | |
556 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
557 | _mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) | |
558 | { | |
559 | return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, | |
560 | (__v4di) __B, | |
561 | (__v4di) | |
562 | _mm256_setzero_si256 (), | |
563 | (__mmask8) __U); | |
564 | } | |
565 | ||
566 | extern __inline __m128i | |
567 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
568 | _mm_mullo_epi64 (__m128i __A, __m128i __B) | |
569 | { | |
2069d6fc | 570 | return (__m128i) ((__v2du) __A * (__v2du) __B); |
936c0fe4 AI |
571 | } |
572 | ||
573 | extern __inline __m128i | |
574 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
575 | _mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, | |
576 | __m128i __B) | |
577 | { | |
578 | return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, | |
579 | (__v2di) __B, | |
580 | (__v2di) __W, | |
581 | (__mmask8) __U); | |
582 | } | |
583 | ||
584 | extern __inline __m128i | |
585 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
586 | _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) | |
587 | { | |
588 | return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, | |
589 | (__v2di) __B, | |
590 | (__v2di) | |
a25a7887 | 591 | _mm_setzero_si128 (), |
936c0fe4 AI |
592 | (__mmask8) __U); |
593 | } | |
594 | ||
595 | extern __inline __m256d | |
596 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
597 | _mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, | |
598 | __m256d __B) | |
599 | { | |
600 | return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, | |
601 | (__v4df) __B, | |
602 | (__v4df) __W, | |
603 | (__mmask8) __U); | |
604 | } | |
605 | ||
606 | extern __inline __m256d | |
607 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
608 | _mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) | |
609 | { | |
610 | return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, | |
611 | (__v4df) __B, | |
612 | (__v4df) | |
613 | _mm256_setzero_pd (), | |
614 | (__mmask8) __U); | |
615 | } | |
616 | ||
617 | extern __inline __m128d | |
618 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
619 | _mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, | |
620 | __m128d __B) | |
621 | { | |
622 | return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, | |
623 | (__v2df) __B, | |
624 | (__v2df) __W, | |
625 | (__mmask8) __U); | |
626 | } | |
627 | ||
628 | extern __inline __m128d | |
629 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
630 | _mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) | |
631 | { | |
632 | return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, | |
633 | (__v2df) __B, | |
634 | (__v2df) | |
635 | _mm_setzero_pd (), | |
636 | (__mmask8) __U); | |
637 | } | |
638 | ||
639 | extern __inline __m256 | |
640 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
641 | _mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, | |
642 | __m256 __B) | |
643 | { | |
644 | return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, | |
645 | (__v8sf) __B, | |
646 | (__v8sf) __W, | |
647 | (__mmask8) __U); | |
648 | } | |
649 | ||
650 | extern __inline __m256 | |
651 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
652 | _mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) | |
653 | { | |
654 | return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, | |
655 | (__v8sf) __B, | |
656 | (__v8sf) | |
657 | _mm256_setzero_ps (), | |
658 | (__mmask8) __U); | |
659 | } | |
660 | ||
661 | extern __inline __m128 | |
662 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
663 | _mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) | |
664 | { | |
665 | return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, | |
666 | (__v4sf) __B, | |
667 | (__v4sf) __W, | |
668 | (__mmask8) __U); | |
669 | } | |
670 | ||
671 | extern __inline __m128 | |
672 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
673 | _mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) | |
674 | { | |
675 | return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, | |
676 | (__v4sf) __B, | |
677 | (__v4sf) | |
678 | _mm_setzero_ps (), | |
679 | (__mmask8) __U); | |
680 | } | |
681 | ||
682 | extern __inline __m256i | |
683 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
684 | _mm256_cvtps_epi64 (__m128 __A) | |
685 | { | |
686 | return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, | |
687 | (__v4di) | |
688 | _mm256_setzero_si256 (), | |
689 | (__mmask8) -1); | |
690 | } | |
691 | ||
692 | extern __inline __m256i | |
693 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
694 | _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) | |
695 | { | |
696 | return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, | |
697 | (__v4di) __W, | |
698 | (__mmask8) __U); | |
699 | } | |
700 | ||
701 | extern __inline __m256i | |
702 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
703 | _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) | |
704 | { | |
705 | return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, | |
706 | (__v4di) | |
707 | _mm256_setzero_si256 (), | |
708 | (__mmask8) __U); | |
709 | } | |
710 | ||
711 | extern __inline __m128i | |
712 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
713 | _mm_cvtps_epi64 (__m128 __A) | |
714 | { | |
715 | return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, | |
716 | (__v2di) | |
a25a7887 | 717 | _mm_setzero_si128 (), |
936c0fe4 AI |
718 | (__mmask8) -1); |
719 | } | |
720 | ||
721 | extern __inline __m128i | |
722 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
723 | _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) | |
724 | { | |
725 | return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, | |
726 | (__v2di) __W, | |
727 | (__mmask8) __U); | |
728 | } | |
729 | ||
730 | extern __inline __m128i | |
731 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
732 | _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) | |
733 | { | |
734 | return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, | |
735 | (__v2di) | |
a25a7887 | 736 | _mm_setzero_si128 (), |
936c0fe4 AI |
737 | (__mmask8) __U); |
738 | } | |
739 | ||
740 | extern __inline __m256i | |
741 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
742 | _mm256_cvtps_epu64 (__m128 __A) | |
743 | { | |
744 | return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, | |
745 | (__v4di) | |
746 | _mm256_setzero_si256 (), | |
747 | (__mmask8) -1); | |
748 | } | |
749 | ||
750 | extern __inline __m256i | |
751 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
752 | _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) | |
753 | { | |
754 | return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, | |
755 | (__v4di) __W, | |
756 | (__mmask8) __U); | |
757 | } | |
758 | ||
759 | extern __inline __m256i | |
760 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
761 | _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) | |
762 | { | |
763 | return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, | |
764 | (__v4di) | |
765 | _mm256_setzero_si256 (), | |
766 | (__mmask8) __U); | |
767 | } | |
768 | ||
769 | extern __inline __m128i | |
770 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
771 | _mm_cvtps_epu64 (__m128 __A) | |
772 | { | |
773 | return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, | |
774 | (__v2di) | |
a25a7887 | 775 | _mm_setzero_si128 (), |
936c0fe4 AI |
776 | (__mmask8) -1); |
777 | } | |
778 | ||
779 | extern __inline __m128i | |
780 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
781 | _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) | |
782 | { | |
783 | return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, | |
784 | (__v2di) __W, | |
785 | (__mmask8) __U); | |
786 | } | |
787 | ||
788 | extern __inline __m128i | |
789 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
790 | _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) | |
791 | { | |
792 | return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, | |
793 | (__v2di) | |
a25a7887 | 794 | _mm_setzero_si128 (), |
936c0fe4 AI |
795 | (__mmask8) __U); |
796 | } | |
797 | ||
798 | extern __inline __m128 | |
799 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
800 | _mm256_cvtepi64_ps (__m256i __A) | |
801 | { | |
802 | return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, | |
803 | (__v4sf) | |
804 | _mm_setzero_ps (), | |
805 | (__mmask8) -1); | |
806 | } | |
807 | ||
808 | extern __inline __m128 | |
809 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
810 | _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) | |
811 | { | |
812 | return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, | |
813 | (__v4sf) __W, | |
814 | (__mmask8) __U); | |
815 | } | |
816 | ||
817 | extern __inline __m128 | |
818 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
819 | _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) | |
820 | { | |
821 | return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, | |
822 | (__v4sf) | |
823 | _mm_setzero_ps (), | |
824 | (__mmask8) __U); | |
825 | } | |
826 | ||
827 | extern __inline __m128 | |
828 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
829 | _mm_cvtepi64_ps (__m128i __A) | |
830 | { | |
831 | return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, | |
832 | (__v4sf) | |
833 | _mm_setzero_ps (), | |
834 | (__mmask8) -1); | |
835 | } | |
836 | ||
837 | extern __inline __m128 | |
838 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
839 | _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) | |
840 | { | |
841 | return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, | |
842 | (__v4sf) __W, | |
843 | (__mmask8) __U); | |
844 | } | |
845 | ||
846 | extern __inline __m128 | |
847 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
848 | _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) | |
849 | { | |
850 | return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, | |
851 | (__v4sf) | |
852 | _mm_setzero_ps (), | |
853 | (__mmask8) __U); | |
854 | } | |
855 | ||
856 | extern __inline __m128 | |
857 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
858 | _mm256_cvtepu64_ps (__m256i __A) | |
859 | { | |
860 | return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, | |
861 | (__v4sf) | |
862 | _mm_setzero_ps (), | |
863 | (__mmask8) -1); | |
864 | } | |
865 | ||
866 | extern __inline __m128 | |
867 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
868 | _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) | |
869 | { | |
870 | return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, | |
871 | (__v4sf) __W, | |
872 | (__mmask8) __U); | |
873 | } | |
874 | ||
875 | extern __inline __m128 | |
876 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
877 | _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) | |
878 | { | |
879 | return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, | |
880 | (__v4sf) | |
881 | _mm_setzero_ps (), | |
882 | (__mmask8) __U); | |
883 | } | |
884 | ||
885 | extern __inline __m128 | |
886 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
887 | _mm_cvtepu64_ps (__m128i __A) | |
888 | { | |
889 | return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, | |
890 | (__v4sf) | |
891 | _mm_setzero_ps (), | |
892 | (__mmask8) -1); | |
893 | } | |
894 | ||
895 | extern __inline __m128 | |
896 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
897 | _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) | |
898 | { | |
899 | return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, | |
900 | (__v4sf) __W, | |
901 | (__mmask8) __U); | |
902 | } | |
903 | ||
904 | extern __inline __m128 | |
905 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
906 | _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) | |
907 | { | |
908 | return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, | |
909 | (__v4sf) | |
910 | _mm_setzero_ps (), | |
911 | (__mmask8) __U); | |
912 | } | |
913 | ||
914 | extern __inline __m256d | |
915 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
916 | _mm256_cvtepi64_pd (__m256i __A) | |
917 | { | |
918 | return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, | |
919 | (__v4df) | |
920 | _mm256_setzero_pd (), | |
921 | (__mmask8) -1); | |
922 | } | |
923 | ||
924 | extern __inline __m256d | |
925 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
926 | _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) | |
927 | { | |
928 | return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, | |
929 | (__v4df) __W, | |
930 | (__mmask8) __U); | |
931 | } | |
932 | ||
933 | extern __inline __m256d | |
934 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
935 | _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) | |
936 | { | |
937 | return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, | |
938 | (__v4df) | |
939 | _mm256_setzero_pd (), | |
940 | (__mmask8) __U); | |
941 | } | |
942 | ||
943 | extern __inline __m128d | |
944 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
945 | _mm_cvtepi64_pd (__m128i __A) | |
946 | { | |
947 | return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, | |
948 | (__v2df) | |
949 | _mm_setzero_pd (), | |
950 | (__mmask8) -1); | |
951 | } | |
952 | ||
953 | extern __inline __m128d | |
954 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
955 | _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) | |
956 | { | |
957 | return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, | |
958 | (__v2df) __W, | |
959 | (__mmask8) __U); | |
960 | } | |
961 | ||
962 | extern __inline __m128d | |
963 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
964 | _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) | |
965 | { | |
966 | return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, | |
967 | (__v2df) | |
968 | _mm_setzero_pd (), | |
969 | (__mmask8) __U); | |
970 | } | |
971 | ||
972 | extern __inline __m256d | |
973 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
974 | _mm256_cvtepu64_pd (__m256i __A) | |
975 | { | |
976 | return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, | |
977 | (__v4df) | |
978 | _mm256_setzero_pd (), | |
979 | (__mmask8) -1); | |
980 | } | |
981 | ||
982 | extern __inline __m256d | |
983 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
984 | _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) | |
985 | { | |
986 | return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, | |
987 | (__v4df) __W, | |
988 | (__mmask8) __U); | |
989 | } | |
990 | ||
991 | extern __inline __m256d | |
992 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
993 | _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) | |
994 | { | |
995 | return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, | |
996 | (__v4df) | |
997 | _mm256_setzero_pd (), | |
998 | (__mmask8) __U); | |
999 | } | |
1000 | ||
1001 | extern __inline __m256d | |
1002 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1003 | _mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, | |
1004 | __m256d __B) | |
1005 | { | |
1006 | return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, | |
1007 | (__v4df) __B, | |
1008 | (__v4df) __W, | |
1009 | (__mmask8) __U); | |
1010 | } | |
1011 | ||
1012 | extern __inline __m256d | |
1013 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1014 | _mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) | |
1015 | { | |
1016 | return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, | |
1017 | (__v4df) __B, | |
1018 | (__v4df) | |
1019 | _mm256_setzero_pd (), | |
1020 | (__mmask8) __U); | |
1021 | } | |
1022 | ||
1023 | extern __inline __m128d | |
1024 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1025 | _mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) | |
1026 | { | |
1027 | return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, | |
1028 | (__v2df) __B, | |
1029 | (__v2df) __W, | |
1030 | (__mmask8) __U); | |
1031 | } | |
1032 | ||
1033 | extern __inline __m128d | |
1034 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1035 | _mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) | |
1036 | { | |
1037 | return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, | |
1038 | (__v2df) __B, | |
1039 | (__v2df) | |
1040 | _mm_setzero_pd (), | |
1041 | (__mmask8) __U); | |
1042 | } | |
1043 | ||
1044 | extern __inline __m256 | |
1045 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1046 | _mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) | |
1047 | { | |
1048 | return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, | |
1049 | (__v8sf) __B, | |
1050 | (__v8sf) __W, | |
1051 | (__mmask8) __U); | |
1052 | } | |
1053 | ||
1054 | extern __inline __m256 | |
1055 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1056 | _mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) | |
1057 | { | |
1058 | return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, | |
1059 | (__v8sf) __B, | |
1060 | (__v8sf) | |
1061 | _mm256_setzero_ps (), | |
1062 | (__mmask8) __U); | |
1063 | } | |
1064 | ||
1065 | extern __inline __m128 | |
1066 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1067 | _mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) | |
1068 | { | |
1069 | return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, | |
1070 | (__v4sf) __B, | |
1071 | (__v4sf) __W, | |
1072 | (__mmask8) __U); | |
1073 | } | |
1074 | ||
1075 | extern __inline __m128 | |
1076 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1077 | _mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) | |
1078 | { | |
1079 | return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, | |
1080 | (__v4sf) __B, | |
1081 | (__v4sf) | |
1082 | _mm_setzero_ps (), | |
1083 | (__mmask8) __U); | |
1084 | } | |
1085 | ||
1086 | extern __inline __m128d | |
1087 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1088 | _mm_cvtepu64_pd (__m128i __A) | |
1089 | { | |
1090 | return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, | |
1091 | (__v2df) | |
1092 | _mm_setzero_pd (), | |
1093 | (__mmask8) -1); | |
1094 | } | |
1095 | ||
1096 | extern __inline __m128d | |
1097 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1098 | _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) | |
1099 | { | |
1100 | return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, | |
1101 | (__v2df) __W, | |
1102 | (__mmask8) __U); | |
1103 | } | |
1104 | ||
1105 | extern __inline __m128d | |
1106 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1107 | _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) | |
1108 | { | |
1109 | return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, | |
1110 | (__v2df) | |
1111 | _mm_setzero_pd (), | |
1112 | (__mmask8) __U); | |
1113 | } | |
1114 | ||
1115 | extern __inline __m256d | |
1116 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1117 | _mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A, | |
1118 | __m256d __B) | |
1119 | { | |
1120 | return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, | |
1121 | (__v4df) __B, | |
1122 | (__v4df) __W, | |
1123 | (__mmask8) __U); | |
1124 | } | |
1125 | ||
1126 | extern __inline __m256d | |
1127 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1128 | _mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) | |
1129 | { | |
1130 | return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, | |
1131 | (__v4df) __B, | |
1132 | (__v4df) | |
1133 | _mm256_setzero_pd (), | |
1134 | (__mmask8) __U); | |
1135 | } | |
1136 | ||
1137 | extern __inline __m128d | |
1138 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1139 | _mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) | |
1140 | { | |
1141 | return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, | |
1142 | (__v2df) __B, | |
1143 | (__v2df) __W, | |
1144 | (__mmask8) __U); | |
1145 | } | |
1146 | ||
1147 | extern __inline __m128d | |
1148 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1149 | _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) | |
1150 | { | |
1151 | return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, | |
1152 | (__v2df) __B, | |
1153 | (__v2df) | |
1154 | _mm_setzero_pd (), | |
1155 | (__mmask8) __U); | |
1156 | } | |
1157 | ||
1158 | extern __inline __m256 | |
1159 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1160 | _mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) | |
1161 | { | |
1162 | return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, | |
1163 | (__v8sf) __B, | |
1164 | (__v8sf) __W, | |
1165 | (__mmask8) __U); | |
1166 | } | |
1167 | ||
1168 | extern __inline __m256 | |
1169 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1170 | _mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) | |
1171 | { | |
1172 | return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, | |
1173 | (__v8sf) __B, | |
1174 | (__v8sf) | |
1175 | _mm256_setzero_ps (), | |
1176 | (__mmask8) __U); | |
1177 | } | |
1178 | ||
1179 | extern __inline __m128 | |
1180 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1181 | _mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) | |
1182 | { | |
1183 | return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, | |
1184 | (__v4sf) __B, | |
1185 | (__v4sf) __W, | |
1186 | (__mmask8) __U); | |
1187 | } | |
1188 | ||
1189 | extern __inline __m128 | |
1190 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1191 | _mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) | |
1192 | { | |
1193 | return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, | |
1194 | (__v4sf) __B, | |
1195 | (__v4sf) | |
1196 | _mm_setzero_ps (), | |
1197 | (__mmask8) __U); | |
1198 | } | |
1199 | ||
1200 | extern __inline __m256d | |
1201 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1202 | _mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) | |
1203 | { | |
1204 | return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, | |
1205 | (__v4df) __B, | |
1206 | (__v4df) __W, | |
1207 | (__mmask8) __U); | |
1208 | } | |
1209 | ||
1210 | extern __inline __m256d | |
1211 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1212 | _mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) | |
1213 | { | |
1214 | return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, | |
1215 | (__v4df) __B, | |
1216 | (__v4df) | |
1217 | _mm256_setzero_pd (), | |
1218 | (__mmask8) __U); | |
1219 | } | |
1220 | ||
1221 | extern __inline __m128d | |
1222 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1223 | _mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) | |
1224 | { | |
1225 | return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, | |
1226 | (__v2df) __B, | |
1227 | (__v2df) __W, | |
1228 | (__mmask8) __U); | |
1229 | } | |
1230 | ||
1231 | extern __inline __m128d | |
1232 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1233 | _mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) | |
1234 | { | |
1235 | return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, | |
1236 | (__v2df) __B, | |
1237 | (__v2df) | |
1238 | _mm_setzero_pd (), | |
1239 | (__mmask8) __U); | |
1240 | } | |
1241 | ||
1242 | extern __inline __m256 | |
1243 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1244 | _mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) | |
1245 | { | |
1246 | return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, | |
1247 | (__v8sf) __B, | |
1248 | (__v8sf) __W, | |
1249 | (__mmask8) __U); | |
1250 | } | |
1251 | ||
1252 | extern __inline __m256 | |
1253 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1254 | _mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) | |
1255 | { | |
1256 | return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, | |
1257 | (__v8sf) __B, | |
1258 | (__v8sf) | |
1259 | _mm256_setzero_ps (), | |
1260 | (__mmask8) __U); | |
1261 | } | |
1262 | ||
1263 | extern __inline __m128 | |
1264 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1265 | _mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) | |
1266 | { | |
1267 | return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, | |
1268 | (__v4sf) __B, | |
1269 | (__v4sf) __W, | |
1270 | (__mmask8) __U); | |
1271 | } | |
1272 | ||
1273 | extern __inline __m128 | |
1274 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1275 | _mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) | |
1276 | { | |
1277 | return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, | |
1278 | (__v4sf) __B, | |
1279 | (__v4sf) | |
1280 | _mm_setzero_ps (), | |
1281 | (__mmask8) __U); | |
1282 | } | |
1283 | ||
1284 | extern __inline __m128i | |
1285 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1286 | _mm_movm_epi32 (__mmask8 __A) | |
1287 | { | |
1288 | return (__m128i) __builtin_ia32_cvtmask2d128 (__A); | |
1289 | } | |
1290 | ||
1291 | extern __inline __m256i | |
1292 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1293 | _mm256_movm_epi32 (__mmask8 __A) | |
1294 | { | |
1295 | return (__m256i) __builtin_ia32_cvtmask2d256 (__A); | |
1296 | } | |
1297 | ||
1298 | extern __inline __m128i | |
1299 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1300 | _mm_movm_epi64 (__mmask8 __A) | |
1301 | { | |
1302 | return (__m128i) __builtin_ia32_cvtmask2q128 (__A); | |
1303 | } | |
1304 | ||
1305 | extern __inline __m256i | |
1306 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1307 | _mm256_movm_epi64 (__mmask8 __A) | |
1308 | { | |
1309 | return (__m256i) __builtin_ia32_cvtmask2q256 (__A); | |
1310 | } | |
1311 | ||
1312 | extern __inline __mmask8 | |
1313 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1314 | _mm_movepi32_mask (__m128i __A) | |
1315 | { | |
1316 | return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A); | |
1317 | } | |
1318 | ||
1319 | extern __inline __mmask8 | |
1320 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1321 | _mm256_movepi32_mask (__m256i __A) | |
1322 | { | |
1323 | return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A); | |
1324 | } | |
1325 | ||
1326 | extern __inline __mmask8 | |
1327 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1328 | _mm_movepi64_mask (__m128i __A) | |
1329 | { | |
1330 | return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A); | |
1331 | } | |
1332 | ||
1333 | extern __inline __mmask8 | |
1334 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1335 | _mm256_movepi64_mask (__m256i __A) | |
1336 | { | |
1337 | return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A); | |
1338 | } | |
1339 | ||
1340 | #ifdef __OPTIMIZE__ | |
1341 | extern __inline __m128d | |
1342 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1343 | _mm256_extractf64x2_pd (__m256d __A, const int __imm) | |
1344 | { | |
1345 | return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A, | |
1346 | __imm, | |
1347 | (__v2df) | |
1348 | _mm_setzero_pd (), | |
c42b0bdf | 1349 | (__mmask8) -1); |
936c0fe4 AI |
1350 | } |
1351 | ||
1352 | extern __inline __m128d | |
1353 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1354 | _mm256_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m256d __A, | |
1355 | const int __imm) | |
1356 | { | |
1357 | return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A, | |
1358 | __imm, | |
1359 | (__v2df) __W, | |
1360 | (__mmask8) | |
1361 | __U); | |
1362 | } | |
1363 | ||
1364 | extern __inline __m128d | |
1365 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1366 | _mm256_maskz_extractf64x2_pd (__mmask8 __U, __m256d __A, | |
1367 | const int __imm) | |
1368 | { | |
1369 | return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A, | |
1370 | __imm, | |
1371 | (__v2df) | |
1372 | _mm_setzero_pd (), | |
1373 | (__mmask8) | |
1374 | __U); | |
1375 | } | |
1376 | ||
1377 | extern __inline __m128i | |
1378 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1379 | _mm256_extracti64x2_epi64 (__m256i __A, const int __imm) | |
1380 | { | |
1381 | return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A, | |
1382 | __imm, | |
1383 | (__v2di) | |
a25a7887 | 1384 | _mm_setzero_si128 (), |
c42b0bdf | 1385 | (__mmask8) -1); |
936c0fe4 AI |
1386 | } |
1387 | ||
1388 | extern __inline __m128i | |
1389 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1390 | _mm256_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m256i __A, | |
1391 | const int __imm) | |
1392 | { | |
1393 | return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A, | |
1394 | __imm, | |
1395 | (__v2di) __W, | |
1396 | (__mmask8) | |
1397 | __U); | |
1398 | } | |
1399 | ||
1400 | extern __inline __m128i | |
1401 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1402 | _mm256_maskz_extracti64x2_epi64 (__mmask8 __U, __m256i __A, | |
1403 | const int __imm) | |
1404 | { | |
1405 | return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A, | |
1406 | __imm, | |
1407 | (__v2di) | |
a25a7887 | 1408 | _mm_setzero_si128 (), |
936c0fe4 AI |
1409 | (__mmask8) |
1410 | __U); | |
1411 | } | |
1412 | ||
1413 | extern __inline __m256d | |
1414 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1415 | _mm256_reduce_pd (__m256d __A, int __B) | |
1416 | { | |
1417 | return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B, | |
1418 | (__v4df) | |
1419 | _mm256_setzero_pd (), | |
1420 | (__mmask8) -1); | |
1421 | } | |
1422 | ||
1423 | extern __inline __m256d | |
1424 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1425 | _mm256_mask_reduce_pd (__m256d __W, __mmask8 __U, __m256d __A, int __B) | |
1426 | { | |
1427 | return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B, | |
1428 | (__v4df) __W, | |
1429 | (__mmask8) __U); | |
1430 | } | |
1431 | ||
1432 | extern __inline __m256d | |
1433 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1434 | _mm256_maskz_reduce_pd (__mmask8 __U, __m256d __A, int __B) | |
1435 | { | |
1436 | return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B, | |
1437 | (__v4df) | |
1438 | _mm256_setzero_pd (), | |
1439 | (__mmask8) __U); | |
1440 | } | |
1441 | ||
1442 | extern __inline __m128d | |
1443 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1444 | _mm_reduce_pd (__m128d __A, int __B) | |
1445 | { | |
1446 | return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B, | |
1447 | (__v2df) | |
1448 | _mm_setzero_pd (), | |
1449 | (__mmask8) -1); | |
1450 | } | |
1451 | ||
1452 | extern __inline __m128d | |
1453 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1454 | _mm_mask_reduce_pd (__m128d __W, __mmask8 __U, __m128d __A, int __B) | |
1455 | { | |
1456 | return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B, | |
1457 | (__v2df) __W, | |
1458 | (__mmask8) __U); | |
1459 | } | |
1460 | ||
1461 | extern __inline __m128d | |
1462 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1463 | _mm_maskz_reduce_pd (__mmask8 __U, __m128d __A, int __B) | |
1464 | { | |
1465 | return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B, | |
1466 | (__v2df) | |
1467 | _mm_setzero_pd (), | |
1468 | (__mmask8) __U); | |
1469 | } | |
1470 | ||
1471 | extern __inline __m256 | |
1472 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1473 | _mm256_reduce_ps (__m256 __A, int __B) | |
1474 | { | |
1475 | return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B, | |
1476 | (__v8sf) | |
1477 | _mm256_setzero_ps (), | |
1478 | (__mmask8) -1); | |
1479 | } | |
1480 | ||
1481 | extern __inline __m256 | |
1482 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1483 | _mm256_mask_reduce_ps (__m256 __W, __mmask8 __U, __m256 __A, int __B) | |
1484 | { | |
1485 | return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B, | |
1486 | (__v8sf) __W, | |
1487 | (__mmask8) __U); | |
1488 | } | |
1489 | ||
1490 | extern __inline __m256 | |
1491 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1492 | _mm256_maskz_reduce_ps (__mmask8 __U, __m256 __A, int __B) | |
1493 | { | |
1494 | return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B, | |
1495 | (__v8sf) | |
1496 | _mm256_setzero_ps (), | |
1497 | (__mmask8) __U); | |
1498 | } | |
1499 | ||
1500 | extern __inline __m128 | |
1501 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1502 | _mm_reduce_ps (__m128 __A, int __B) | |
1503 | { | |
1504 | return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B, | |
1505 | (__v4sf) | |
1506 | _mm_setzero_ps (), | |
1507 | (__mmask8) -1); | |
1508 | } | |
1509 | ||
1510 | extern __inline __m128 | |
1511 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1512 | _mm_mask_reduce_ps (__m128 __W, __mmask8 __U, __m128 __A, int __B) | |
1513 | { | |
1514 | return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B, | |
1515 | (__v4sf) __W, | |
1516 | (__mmask8) __U); | |
1517 | } | |
1518 | ||
1519 | extern __inline __m128 | |
1520 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1521 | _mm_maskz_reduce_ps (__mmask8 __U, __m128 __A, int __B) | |
1522 | { | |
1523 | return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B, | |
1524 | (__v4sf) | |
1525 | _mm_setzero_ps (), | |
1526 | (__mmask8) __U); | |
1527 | } | |
1528 | ||
1529 | extern __inline __m256d | |
1530 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1531 | _mm256_range_pd (__m256d __A, __m256d __B, int __C) | |
1532 | { | |
1533 | return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A, | |
1534 | (__v4df) __B, __C, | |
1535 | (__v4df) | |
1536 | _mm256_setzero_pd (), | |
1537 | (__mmask8) -1); | |
1538 | } | |
1539 | ||
1540 | extern __inline __m256d | |
1541 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1542 | _mm256_mask_range_pd (__m256d __W, __mmask8 __U, | |
1543 | __m256d __A, __m256d __B, int __C) | |
1544 | { | |
1545 | return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A, | |
1546 | (__v4df) __B, __C, | |
1547 | (__v4df) __W, | |
1548 | (__mmask8) __U); | |
1549 | } | |
1550 | ||
1551 | extern __inline __m256d | |
1552 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1553 | _mm256_maskz_range_pd (__mmask8 __U, __m256d __A, __m256d __B, int __C) | |
1554 | { | |
1555 | return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A, | |
1556 | (__v4df) __B, __C, | |
1557 | (__v4df) | |
1558 | _mm256_setzero_pd (), | |
1559 | (__mmask8) __U); | |
1560 | } | |
1561 | ||
1562 | extern __inline __m128d | |
1563 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1564 | _mm_range_pd (__m128d __A, __m128d __B, int __C) | |
1565 | { | |
1566 | return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A, | |
1567 | (__v2df) __B, __C, | |
1568 | (__v2df) | |
1569 | _mm_setzero_pd (), | |
1570 | (__mmask8) -1); | |
1571 | } | |
1572 | ||
1573 | extern __inline __m128d | |
1574 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1575 | _mm_mask_range_pd (__m128d __W, __mmask8 __U, | |
1576 | __m128d __A, __m128d __B, int __C) | |
1577 | { | |
1578 | return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A, | |
1579 | (__v2df) __B, __C, | |
1580 | (__v2df) __W, | |
1581 | (__mmask8) __U); | |
1582 | } | |
1583 | ||
1584 | extern __inline __m128d | |
1585 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1586 | _mm_maskz_range_pd (__mmask8 __U, __m128d __A, __m128d __B, int __C) | |
1587 | { | |
1588 | return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A, | |
1589 | (__v2df) __B, __C, | |
1590 | (__v2df) | |
1591 | _mm_setzero_pd (), | |
1592 | (__mmask8) __U); | |
1593 | } | |
1594 | ||
1595 | extern __inline __m256 | |
1596 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1597 | _mm256_range_ps (__m256 __A, __m256 __B, int __C) | |
1598 | { | |
1599 | return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A, | |
1600 | (__v8sf) __B, __C, | |
1601 | (__v8sf) | |
1602 | _mm256_setzero_ps (), | |
1603 | (__mmask8) -1); | |
1604 | } | |
1605 | ||
1606 | extern __inline __m256 | |
1607 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1608 | _mm256_mask_range_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B, | |
1609 | int __C) | |
1610 | { | |
1611 | return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A, | |
1612 | (__v8sf) __B, __C, | |
1613 | (__v8sf) __W, | |
1614 | (__mmask8) __U); | |
1615 | } | |
1616 | ||
1617 | extern __inline __m256 | |
1618 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1619 | _mm256_maskz_range_ps (__mmask8 __U, __m256 __A, __m256 __B, int __C) | |
1620 | { | |
1621 | return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A, | |
1622 | (__v8sf) __B, __C, | |
1623 | (__v8sf) | |
1624 | _mm256_setzero_ps (), | |
1625 | (__mmask8) __U); | |
1626 | } | |
1627 | ||
1628 | extern __inline __m128 | |
1629 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1630 | _mm_range_ps (__m128 __A, __m128 __B, int __C) | |
1631 | { | |
1632 | return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A, | |
1633 | (__v4sf) __B, __C, | |
1634 | (__v4sf) | |
1635 | _mm_setzero_ps (), | |
1636 | (__mmask8) -1); | |
1637 | } | |
1638 | ||
1639 | extern __inline __m128 | |
1640 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1641 | _mm_mask_range_ps (__m128 __W, __mmask8 __U, | |
1642 | __m128 __A, __m128 __B, int __C) | |
1643 | { | |
1644 | return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A, | |
1645 | (__v4sf) __B, __C, | |
1646 | (__v4sf) __W, | |
1647 | (__mmask8) __U); | |
1648 | } | |
1649 | ||
1650 | extern __inline __m128 | |
1651 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1652 | _mm_maskz_range_ps (__mmask8 __U, __m128 __A, __m128 __B, int __C) | |
1653 | { | |
1654 | return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A, | |
1655 | (__v4sf) __B, __C, | |
1656 | (__v4sf) | |
1657 | _mm_setzero_ps (), | |
1658 | (__mmask8) __U); | |
1659 | } | |
1660 | ||
1661 | extern __inline __mmask8 | |
1662 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1663 | _mm256_mask_fpclass_pd_mask (__mmask8 __U, __m256d __A, | |
1664 | const int __imm) | |
1665 | { | |
1666 | return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A, | |
1667 | __imm, __U); | |
1668 | } | |
1669 | ||
1670 | extern __inline __mmask8 | |
1671 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1672 | _mm256_fpclass_pd_mask (__m256d __A, const int __imm) | |
1673 | { | |
1674 | return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A, | |
1675 | __imm, | |
1676 | (__mmask8) -1); | |
1677 | } | |
1678 | ||
1679 | extern __inline __mmask8 | |
1680 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1681 | _mm256_mask_fpclass_ps_mask (__mmask8 __U, __m256 __A, const int __imm) | |
1682 | { | |
1683 | return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A, | |
1684 | __imm, __U); | |
1685 | } | |
1686 | ||
1687 | extern __inline __mmask8 | |
1688 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1689 | _mm256_fpclass_ps_mask (__m256 __A, const int __imm) | |
1690 | { | |
1691 | return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A, | |
1692 | __imm, | |
1693 | (__mmask8) -1); | |
1694 | } | |
1695 | ||
1696 | extern __inline __mmask8 | |
1697 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1698 | _mm_mask_fpclass_pd_mask (__mmask8 __U, __m128d __A, const int __imm) | |
1699 | { | |
1700 | return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A, | |
1701 | __imm, __U); | |
1702 | } | |
1703 | ||
1704 | extern __inline __mmask8 | |
1705 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1706 | _mm_fpclass_pd_mask (__m128d __A, const int __imm) | |
1707 | { | |
1708 | return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A, | |
1709 | __imm, | |
1710 | (__mmask8) -1); | |
1711 | } | |
1712 | ||
1713 | extern __inline __mmask8 | |
1714 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1715 | _mm_mask_fpclass_ps_mask (__mmask8 __U, __m128 __A, const int __imm) | |
1716 | { | |
1717 | return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A, | |
1718 | __imm, __U); | |
1719 | } | |
1720 | ||
1721 | extern __inline __mmask8 | |
1722 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1723 | _mm_fpclass_ps_mask (__m128 __A, const int __imm) | |
1724 | { | |
1725 | return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A, | |
1726 | __imm, | |
1727 | (__mmask8) -1); | |
1728 | } | |
1729 | ||
1730 | extern __inline __m256i | |
1731 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1732 | _mm256_inserti64x2 (__m256i __A, __m128i __B, const int __imm) | |
1733 | { | |
1734 | return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A, | |
1735 | (__v2di) __B, | |
1736 | __imm, | |
1737 | (__v4di) | |
1738 | _mm256_setzero_si256 (), | |
c42b0bdf | 1739 | (__mmask8) -1); |
936c0fe4 AI |
1740 | } |
1741 | ||
1742 | extern __inline __m256i | |
1743 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1744 | _mm256_mask_inserti64x2 (__m256i __W, __mmask8 __U, __m256i __A, | |
1745 | __m128i __B, const int __imm) | |
1746 | { | |
1747 | return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A, | |
1748 | (__v2di) __B, | |
1749 | __imm, | |
1750 | (__v4di) __W, | |
1751 | (__mmask8) | |
1752 | __U); | |
1753 | } | |
1754 | ||
1755 | extern __inline __m256i | |
1756 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1757 | _mm256_maskz_inserti64x2 (__mmask8 __U, __m256i __A, __m128i __B, | |
1758 | const int __imm) | |
1759 | { | |
1760 | return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A, | |
1761 | (__v2di) __B, | |
1762 | __imm, | |
1763 | (__v4di) | |
1764 | _mm256_setzero_si256 (), | |
1765 | (__mmask8) | |
1766 | __U); | |
1767 | } | |
1768 | ||
1769 | extern __inline __m256d | |
1770 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1771 | _mm256_insertf64x2 (__m256d __A, __m128d __B, const int __imm) | |
1772 | { | |
1773 | return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A, | |
1774 | (__v2df) __B, | |
1775 | __imm, | |
1776 | (__v4df) | |
1777 | _mm256_setzero_pd (), | |
c42b0bdf | 1778 | (__mmask8) -1); |
936c0fe4 AI |
1779 | } |
1780 | ||
1781 | extern __inline __m256d | |
1782 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1783 | _mm256_mask_insertf64x2 (__m256d __W, __mmask8 __U, __m256d __A, | |
1784 | __m128d __B, const int __imm) | |
1785 | { | |
1786 | return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A, | |
1787 | (__v2df) __B, | |
1788 | __imm, | |
1789 | (__v4df) __W, | |
1790 | (__mmask8) | |
1791 | __U); | |
1792 | } | |
1793 | ||
1794 | extern __inline __m256d | |
1795 | __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) | |
1796 | _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B, | |
1797 | const int __imm) | |
1798 | { | |
1799 | return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A, | |
1800 | (__v2df) __B, | |
1801 | __imm, | |
1802 | (__v4df) | |
1803 | _mm256_setzero_pd (), | |
1804 | (__mmask8) | |
1805 | __U); | |
1806 | } | |
1807 | ||
1808 | #else | |
1809 | #define _mm256_insertf64x2(X, Y, C) \ | |
1810 | ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\ | |
1811 | (__v2df)(__m128d) (Y), (int) (C), \ | |
1812 | (__v4df)(__m256d)_mm256_setzero_pd(), \ | |
1813 | (__mmask8)-1)) | |
1814 | ||
1815 | #define _mm256_mask_insertf64x2(W, U, X, Y, C) \ | |
1816 | ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\ | |
1817 | (__v2df)(__m128d) (Y), (int) (C), \ | |
1818 | (__v4df)(__m256d)(W), \ | |
1819 | (__mmask8)(U))) | |
1820 | ||
1821 | #define _mm256_maskz_insertf64x2(U, X, Y, C) \ | |
1822 | ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\ | |
1823 | (__v2df)(__m128d) (Y), (int) (C), \ | |
1824 | (__v4df)(__m256d)_mm256_setzero_pd(), \ | |
1825 | (__mmask8)(U))) | |
1826 | ||
1827 | #define _mm256_inserti64x2(X, Y, C) \ | |
1828 | ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\ | |
1829 | (__v2di)(__m128i) (Y), (int) (C), \ | |
1830 | (__v4di)(__m256i)_mm256_setzero_si256 (), \ | |
1831 | (__mmask8)-1)) | |
1832 | ||
1833 | #define _mm256_mask_inserti64x2(W, U, X, Y, C) \ | |
1834 | ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\ | |
1835 | (__v2di)(__m128i) (Y), (int) (C), \ | |
1836 | (__v4di)(__m256i)(W), \ | |
1837 | (__mmask8)(U))) | |
1838 | ||
1839 | #define _mm256_maskz_inserti64x2(U, X, Y, C) \ | |
1840 | ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\ | |
1841 | (__v2di)(__m128i) (Y), (int) (C), \ | |
1842 | (__v4di)(__m256i)_mm256_setzero_si256 (), \ | |
1843 | (__mmask8)(U))) | |
1844 | ||
1845 | #define _mm256_extractf64x2_pd(X, C) \ | |
1846 | ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\ | |
1847 | (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1)) | |
1848 | ||
1849 | #define _mm256_mask_extractf64x2_pd(W, U, X, C) \ | |
1850 | ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\ | |
1851 | (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U))) | |
1852 | ||
1853 | #define _mm256_maskz_extractf64x2_pd(U, X, C) \ | |
1854 | ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\ | |
1855 | (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U))) | |
1856 | ||
1857 | #define _mm256_extracti64x2_epi64(X, C) \ | |
1858 | ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\ | |
a25a7887 | 1859 | (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8)-1)) |
936c0fe4 AI |
1860 | |
1861 | #define _mm256_mask_extracti64x2_epi64(W, U, X, C) \ | |
1862 | ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\ | |
1863 | (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U))) | |
1864 | ||
1865 | #define _mm256_maskz_extracti64x2_epi64(U, X, C) \ | |
1866 | ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\ | |
a25a7887 | 1867 | (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8) (U))) |
936c0fe4 AI |
1868 | |
1869 | #define _mm256_reduce_pd(A, B) \ | |
1870 | ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \ | |
1871 | (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)-1)) | |
1872 | ||
1873 | #define _mm256_mask_reduce_pd(W, U, A, B) \ | |
1874 | ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \ | |
1875 | (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U))) | |
1876 | ||
1877 | #define _mm256_maskz_reduce_pd(U, A, B) \ | |
1878 | ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \ | |
1879 | (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)(U))) | |
1880 | ||
1881 | #define _mm_reduce_pd(A, B) \ | |
1882 | ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \ | |
1883 | (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)-1)) | |
1884 | ||
1885 | #define _mm_mask_reduce_pd(W, U, A, B) \ | |
1886 | ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \ | |
1887 | (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U))) | |
1888 | ||
1889 | #define _mm_maskz_reduce_pd(U, A, B) \ | |
1890 | ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \ | |
1891 | (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)(U))) | |
1892 | ||
1893 | #define _mm256_reduce_ps(A, B) \ | |
1894 | ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \ | |
1895 | (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)-1)) | |
1896 | ||
1897 | #define _mm256_mask_reduce_ps(W, U, A, B) \ | |
1898 | ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \ | |
1899 | (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U))) | |
1900 | ||
1901 | #define _mm256_maskz_reduce_ps(U, A, B) \ | |
1902 | ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \ | |
1903 | (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U))) | |
1904 | ||
1905 | #define _mm_reduce_ps(A, B) \ | |
1906 | ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \ | |
1907 | (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)-1)) | |
1908 | ||
1909 | #define _mm_mask_reduce_ps(W, U, A, B) \ | |
1910 | ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \ | |
1911 | (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U))) | |
1912 | ||
1913 | #define _mm_maskz_reduce_ps(U, A, B) \ | |
1914 | ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \ | |
1915 | (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)(U))) | |
1916 | ||
1917 | #define _mm256_range_pd(A, B, C) \ | |
1918 | ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \ | |
1919 | (__v4df)(__m256d)(B), (int)(C), \ | |
1920 | (__v4df)_mm256_setzero_pd(), (__mmask8)-1)) | |
1921 | ||
1922 | #define _mm256_maskz_range_pd(U, A, B, C) \ | |
1923 | ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \ | |
1924 | (__v4df)(__m256d)(B), (int)(C), \ | |
1925 | (__v4df)_mm256_setzero_pd(), (__mmask8)(U))) | |
1926 | ||
1927 | #define _mm_range_pd(A, B, C) \ | |
1928 | ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \ | |
1929 | (__v2df)(__m128d)(B), (int)(C), \ | |
1930 | (__v2df)_mm_setzero_pd(), (__mmask8)-1)) | |
1931 | ||
1932 | #define _mm256_range_ps(A, B, C) \ | |
1933 | ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \ | |
1934 | (__v8sf)(__m256)(B), (int)(C), \ | |
1935 | (__v8sf)_mm256_setzero_ps(), (__mmask8)-1)) | |
1936 | ||
1937 | #define _mm256_mask_range_ps(W, U, A, B, C) \ | |
1938 | ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \ | |
1939 | (__v8sf)(__m256)(B), (int)(C), \ | |
1940 | (__v8sf)(__m256)(W), (__mmask8)(U))) | |
1941 | ||
1942 | #define _mm256_maskz_range_ps(U, A, B, C) \ | |
1943 | ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \ | |
1944 | (__v8sf)(__m256)(B), (int)(C), \ | |
1945 | (__v8sf)_mm256_setzero_ps(), (__mmask8)(U))) | |
1946 | ||
1947 | #define _mm_range_ps(A, B, C) \ | |
1948 | ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \ | |
1949 | (__v4sf)(__m128)(B), (int)(C), \ | |
1950 | (__v4sf)_mm_setzero_ps(), (__mmask8)-1)) | |
1951 | ||
1952 | #define _mm_mask_range_ps(W, U, A, B, C) \ | |
1953 | ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \ | |
1954 | (__v4sf)(__m128)(B), (int)(C), \ | |
1955 | (__v4sf)(__m128)(W), (__mmask8)(U))) | |
1956 | ||
1957 | #define _mm_maskz_range_ps(U, A, B, C) \ | |
1958 | ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \ | |
1959 | (__v4sf)(__m128)(B), (int)(C), \ | |
1960 | (__v4sf)_mm_setzero_ps(), (__mmask8)(U))) | |
1961 | ||
1962 | #define _mm256_mask_range_pd(W, U, A, B, C) \ | |
1963 | ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \ | |
1964 | (__v4df)(__m256d)(B), (int)(C), \ | |
1965 | (__v4df)(__m256d)(W), (__mmask8)(U))) | |
1966 | ||
1967 | #define _mm_mask_range_pd(W, U, A, B, C) \ | |
1968 | ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \ | |
1969 | (__v2df)(__m128d)(B), (int)(C), \ | |
1970 | (__v2df)(__m128d)(W), (__mmask8)(U))) | |
1971 | ||
1972 | #define _mm_maskz_range_pd(U, A, B, C) \ | |
1973 | ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \ | |
1974 | (__v2df)(__m128d)(B), (int)(C), \ | |
1975 | (__v2df)_mm_setzero_pd(), (__mmask8)(U))) | |
1976 | ||
1977 | #define _mm256_mask_fpclass_pd_mask(u, X, C) \ | |
1978 | ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \ | |
1979 | (int) (C),(__mmask8)(u))) | |
1980 | ||
1981 | #define _mm256_mask_fpclass_ps_mask(u, X, C) \ | |
1982 | ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \ | |
1983 | (int) (C),(__mmask8)(u))) | |
1984 | ||
1985 | #define _mm_mask_fpclass_pd_mask(u, X, C) \ | |
1986 | ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \ | |
1987 | (int) (C),(__mmask8)(u))) | |
1988 | ||
1989 | #define _mm_mask_fpclass_ps_mask(u, X, C) \ | |
1990 | ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \ | |
1991 | (int) (C),(__mmask8)(u))) | |
1992 | ||
1993 | #define _mm256_fpclass_pd_mask(X, C) \ | |
1994 | ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \ | |
1995 | (int) (C),(__mmask8)-1)) | |
1996 | ||
1997 | #define _mm256_fpclass_ps_mask(X, C) \ | |
1998 | ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \ | |
1999 | (int) (C),(__mmask8)-1)) | |
2000 | ||
2001 | #define _mm_fpclass_pd_mask(X, C) \ | |
2002 | ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \ | |
2003 | (int) (C),(__mmask8)-1)) | |
2004 | ||
2005 | #define _mm_fpclass_ps_mask(X, C) \ | |
2006 | ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \ | |
2007 | (int) (C),(__mmask8)-1)) | |
2008 | ||
2009 | #endif | |
2010 | ||
cfb1dde5 HJ |
2011 | #ifdef __DISABLE_AVX512VLDQ__ |
2012 | #undef __DISABLE_AVX512VLDQ__ | |
2013 | #pragma GCC pop_options | |
2014 | #endif /* __DISABLE_AVX512VLDQ__ */ | |
2015 | ||
936c0fe4 | 2016 | #endif /* _AVX512VLDQINTRIN_H_INCLUDED */ |