]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/avx512fintrin.h
Update copyright years.
[thirdparty/gcc.git] / gcc / config / i386 / avx512fintrin.h
CommitLineData
a945c346 1/* Copyright (C) 2013-2024 Free Software Foundation, Inc.
756c5857
AI
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512FINTRIN_H_INCLUDED
29#define _AVX512FINTRIN_H_INCLUDED
30
fd514717 31#if !defined (__AVX512F__) || defined (__EVEX512__)
756c5857 32#pragma GCC push_options
fd514717 33#pragma GCC target("avx512f,no-evex512")
756c5857
AI
34#define __DISABLE_AVX512F__
35#endif /* __AVX512F__ */
36
756c5857
AI
37typedef unsigned char __mmask8;
38typedef unsigned short __mmask16;
79fb4764 39typedef unsigned int __mmask32;
756c5857 40
79fb4764
HJ
41/* Constants for mantissa extraction */
42typedef enum
dcb2c527 43{
79fb4764
HJ
44 _MM_MANT_NORM_1_2, /* interval [1, 2) */
45 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
46 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
47 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
48} _MM_MANTISSA_NORM_ENUM;
dcb2c527 49
79fb4764 50typedef enum
dcb2c527 51{
79fb4764
HJ
52 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
53 _MM_MANT_SIGN_zero, /* sign = 0 */
54 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
55} _MM_MANTISSA_SIGN_ENUM;
dcb2c527 56
fd79b414
HJ
57/* These _mm{,256}_avx512* intrins are duplicated from their _mm{,256}_* forms
58 from AVX2 or before. We need to add them to prevent target option mismatch
59 when calling AVX512 intrins implemented with these intrins under no-evex512
60 function attribute. All AVX512 intrins calling those AVX2 intrins or
61 before will change their calls to these AVX512 version. */
4bbabb2a
HJ
62extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
63_mm_avx512_undefined_ps (void)
64{
65#pragma GCC diagnostic push
66#pragma GCC diagnostic ignored "-Winit-self"
67 __m128 __Y = __Y;
68#pragma GCC diagnostic pop
69 return __Y;
70}
71
72extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
73_mm_avx512_undefined_pd (void)
74{
75#pragma GCC diagnostic push
76#pragma GCC diagnostic ignored "-Winit-self"
77 __m128d __Y = __Y;
78#pragma GCC diagnostic pop
79 return __Y;
80}
81
fd79b414
HJ
82extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
83_mm_avx512_setzero_ps (void)
84{
85 return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
86}
87
88extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
89_mm_avx512_setzero_pd (void)
90{
91 return __extension__ (__m128d){ 0.0, 0.0 };
92}
93
79fb4764
HJ
94#ifdef __OPTIMIZE__
95extern __inline __m128d
756c5857 96__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 97_mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
756c5857 98{
79fb4764
HJ
99 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
100 (__v2df) __B,
101 __R);
756c5857
AI
102}
103
79fb4764 104extern __inline __m128d
756c5857 105__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
106_mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
107 __m128d __B, const int __R)
4e6a811f 108{
79fb4764
HJ
109 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
110 (__v2df) __B,
111 (__v2df) __W,
112 (__mmask8) __U, __R);
4e6a811f
JJ
113}
114
79fb4764
HJ
115extern __inline __m128d
116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
117_mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
118 const int __R)
4e6a811f 119{
79fb4764
HJ
120 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
121 (__v2df) __B,
122 (__v2df)
fd79b414 123 _mm_avx512_setzero_pd (),
79fb4764 124 (__mmask8) __U, __R);
4e6a811f
JJ
125}
126
79fb4764 127extern __inline __m128
756c5857 128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 129_mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
756c5857 130{
79fb4764
HJ
131 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
132 (__v4sf) __B,
133 __R);
756c5857
AI
134}
135
79fb4764 136extern __inline __m128
756c5857 137__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
138_mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
139 __m128 __B, const int __R)
756c5857 140{
79fb4764
HJ
141 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
142 (__v4sf) __B,
143 (__v4sf) __W,
144 (__mmask8) __U, __R);
756c5857
AI
145}
146
79fb4764 147extern __inline __m128
0b192937 148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
149_mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
150 const int __R)
0b192937 151{
79fb4764
HJ
152 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
153 (__v4sf) __B,
154 (__v4sf)
fd79b414 155 _mm_avx512_setzero_ps (),
79fb4764 156 (__mmask8) __U, __R);
0b192937
UD
157}
158
79fb4764 159extern __inline __m128d
0b192937 160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 161_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
0b192937 162{
79fb4764
HJ
163 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
164 (__v2df) __B,
165 __R);
0b192937
UD
166}
167
79fb4764 168extern __inline __m128d
0b192937 169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
170_mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
171 __m128d __B, const int __R)
0b192937 172{
79fb4764
HJ
173 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
174 (__v2df) __B,
175 (__v2df) __W,
176 (__mmask8) __U, __R);
0b192937
UD
177}
178
79fb4764 179extern __inline __m128d
7d9088c2 180__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
181_mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
182 const int __R)
7d9088c2 183{
79fb4764
HJ
184 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
185 (__v2df) __B,
186 (__v2df)
fd79b414 187 _mm_avx512_setzero_pd (),
79fb4764 188 (__mmask8) __U, __R);
7d9088c2
UD
189}
190
79fb4764 191extern __inline __m128
7d9088c2 192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 193_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
7d9088c2 194{
79fb4764
HJ
195 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
196 (__v4sf) __B,
197 __R);
7d9088c2
UD
198}
199
79fb4764 200extern __inline __m128
2b2384e8 201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
202_mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
203 __m128 __B, const int __R)
2b2384e8 204{
79fb4764
HJ
205 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
206 (__v4sf) __B,
207 (__v4sf) __W,
208 (__mmask8) __U, __R);
2b2384e8
UD
209}
210
79fb4764 211extern __inline __m128
2b2384e8 212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
213_mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
214 const int __R)
2b2384e8 215{
79fb4764
HJ
216 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
217 (__v4sf) __B,
218 (__v4sf)
fd79b414 219 _mm_avx512_setzero_ps (),
79fb4764 220 (__mmask8) __U, __R);
2b2384e8
UD
221}
222
79fb4764
HJ
223#else
224#define _mm_add_round_sd(A, B, C) \
225 (__m128d)__builtin_ia32_addsd_round(A, B, C)
7d9088c2 226
79fb4764
HJ
227#define _mm_mask_add_round_sd(W, U, A, B, C) \
228 (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C)
7d9088c2 229
79fb4764 230#define _mm_maskz_add_round_sd(U, A, B, C) \
fd79b414 231 (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U, C)
7d9088c2 232
79fb4764
HJ
233#define _mm_add_round_ss(A, B, C) \
234 (__m128)__builtin_ia32_addss_round(A, B, C)
7d9088c2 235
79fb4764
HJ
236#define _mm_mask_add_round_ss(W, U, A, B, C) \
237 (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C)
7d9088c2 238
79fb4764 239#define _mm_maskz_add_round_ss(U, A, B, C) \
fd79b414 240 (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U, C)
7d9088c2 241
79fb4764
HJ
242#define _mm_sub_round_sd(A, B, C) \
243 (__m128d)__builtin_ia32_subsd_round(A, B, C)
7d9088c2 244
79fb4764
HJ
245#define _mm_mask_sub_round_sd(W, U, A, B, C) \
246 (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C)
7d9088c2 247
79fb4764 248#define _mm_maskz_sub_round_sd(U, A, B, C) \
fd79b414 249 (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U, C)
79fb4764
HJ
250
251#define _mm_sub_round_ss(A, B, C) \
252 (__m128)__builtin_ia32_subss_round(A, B, C)
253
254#define _mm_mask_sub_round_ss(W, U, A, B, C) \
255 (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C)
256
257#define _mm_maskz_sub_round_ss(U, A, B, C) \
fd79b414 258 (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U, C)
79fb4764
HJ
259
260#endif
261
262extern __inline __m128d
756c5857 263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 264_mm_rcp14_sd (__m128d __A, __m128d __B)
756c5857 265{
79fb4764
HJ
266 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
267 (__v2df) __A);
756c5857
AI
268}
269
79fb4764 270extern __inline __m128d
4e6a811f 271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 272_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
4e6a811f 273{
79fb4764
HJ
274 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
275 (__v2df) __A,
276 (__v2df) __W,
277 (__mmask8) __U);
4e6a811f
JJ
278}
279
79fb4764 280extern __inline __m128d
756c5857 281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 282_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
756c5857 283{
79fb4764
HJ
284 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
285 (__v2df) __A,
fd79b414 286 (__v2df) _mm_avx512_setzero_ps (),
79fb4764 287 (__mmask8) __U);
756c5857
AI
288}
289
79fb4764 290extern __inline __m128
7d9088c2 291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 292_mm_rcp14_ss (__m128 __A, __m128 __B)
7d9088c2 293{
79fb4764
HJ
294 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
295 (__v4sf) __A);
7d9088c2
UD
296}
297
79fb4764 298extern __inline __m128
756c5857 299__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 300_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
756c5857 301{
79fb4764
HJ
302 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
303 (__v4sf) __A,
304 (__v4sf) __W,
305 (__mmask8) __U);
756c5857
AI
306}
307
79fb4764 308extern __inline __m128
756c5857 309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 310_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
756c5857 311{
79fb4764
HJ
312 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
313 (__v4sf) __A,
fd79b414 314 (__v4sf) _mm_avx512_setzero_ps (),
79fb4764 315 (__mmask8) __U);
756c5857
AI
316}
317
79fb4764 318extern __inline __m128d
756c5857 319__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 320_mm_rsqrt14_sd (__m128d __A, __m128d __B)
756c5857 321{
79fb4764
HJ
322 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
323 (__v2df) __A);
756c5857
AI
324}
325
79fb4764 326extern __inline __m128d
756c5857 327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 328_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
756c5857 329{
79fb4764
HJ
330 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
331 (__v2df) __A,
332 (__v2df) __W,
333 (__mmask8) __U);
756c5857
AI
334}
335
79fb4764 336extern __inline __m128d
756c5857 337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 338_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
756c5857 339{
79fb4764
HJ
340 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
341 (__v2df) __A,
fd79b414 342 (__v2df) _mm_avx512_setzero_pd (),
79fb4764 343 (__mmask8) __U);
756c5857
AI
344}
345
79fb4764 346extern __inline __m128
756c5857 347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 348_mm_rsqrt14_ss (__m128 __A, __m128 __B)
756c5857 349{
79fb4764
HJ
350 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
351 (__v4sf) __A);
756c5857
AI
352}
353
79fb4764 354extern __inline __m128
756c5857 355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 356_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
756c5857 357{
79fb4764
HJ
358 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
359 (__v4sf) __A,
360 (__v4sf) __W,
361 (__mmask8) __U);
756c5857
AI
362}
363
79fb4764 364extern __inline __m128
756c5857 365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 366_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
756c5857 367{
79fb4764
HJ
368 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
369 (__v4sf) __A,
fd79b414 370 (__v4sf) _mm_avx512_setzero_ps (),
79fb4764 371 (__mmask8) __U);
756c5857
AI
372}
373
79fb4764
HJ
374#ifdef __OPTIMIZE__
375extern __inline __m128d
756c5857 376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 377_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
756c5857 378{
79fb4764
HJ
379 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
380 (__v2df) __A,
381 (__v2df)
fd79b414 382 _mm_avx512_setzero_pd (),
79fb4764 383 (__mmask8) -1, __R);
756c5857
AI
384}
385
79fb4764 386extern __inline __m128d
756c5857 387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
388_mm_mask_sqrt_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
389 const int __R)
756c5857 390{
79fb4764
HJ
391 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
392 (__v2df) __A,
393 (__v2df) __W,
394 (__mmask8) __U, __R);
756c5857
AI
395}
396
79fb4764 397extern __inline __m128d
756c5857 398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 399_mm_maskz_sqrt_round_sd (__mmask8 __U, __m128d __A, __m128d __B, const int __R)
756c5857 400{
79fb4764
HJ
401 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
402 (__v2df) __A,
403 (__v2df)
fd79b414 404 _mm_avx512_setzero_pd (),
79fb4764 405 (__mmask8) __U, __R);
756c5857
AI
406}
407
79fb4764 408extern __inline __m128
756c5857 409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 410_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
756c5857 411{
79fb4764
HJ
412 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
413 (__v4sf) __A,
414 (__v4sf)
fd79b414 415 _mm_avx512_setzero_ps (),
79fb4764 416 (__mmask8) -1, __R);
756c5857
AI
417}
418
79fb4764 419extern __inline __m128
756c5857 420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
421_mm_mask_sqrt_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
422 const int __R)
756c5857 423{
79fb4764
HJ
424 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
425 (__v4sf) __A,
426 (__v4sf) __W,
427 (__mmask8) __U, __R);
756c5857
AI
428}
429
79fb4764 430extern __inline __m128
756c5857 431__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 432_mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
756c5857 433{
79fb4764
HJ
434 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
435 (__v4sf) __A,
436 (__v4sf)
fd79b414 437 _mm_avx512_setzero_ps (),
79fb4764 438 (__mmask8) __U, __R);
756c5857
AI
439}
440
79fb4764 441extern __inline __m128d
756c5857 442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 443_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
756c5857 444{
79fb4764
HJ
445 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
446 (__v2df) __B,
447 __R);
756c5857
AI
448}
449
79fb4764 450extern __inline __m128d
756c5857 451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
452_mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
453 __m128d __B, const int __R)
756c5857 454{
79fb4764
HJ
455 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
456 (__v2df) __B,
457 (__v2df) __W,
458 (__mmask8) __U, __R);
756c5857
AI
459}
460
79fb4764 461extern __inline __m128d
756c5857 462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
463_mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
464 const int __R)
756c5857 465{
79fb4764
HJ
466 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
467 (__v2df) __B,
468 (__v2df)
fd79b414 469 _mm_avx512_setzero_pd (),
79fb4764 470 (__mmask8) __U, __R);
756c5857
AI
471}
472
79fb4764 473extern __inline __m128
756c5857 474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 475_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
756c5857 476{
79fb4764
HJ
477 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
478 (__v4sf) __B,
479 __R);
756c5857
AI
480}
481
79fb4764 482extern __inline __m128
756c5857 483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
484_mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
485 __m128 __B, const int __R)
756c5857 486{
79fb4764
HJ
487 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
488 (__v4sf) __B,
489 (__v4sf) __W,
490 (__mmask8) __U, __R);
756c5857
AI
491}
492
79fb4764 493extern __inline __m128
756c5857 494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
495_mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
496 const int __R)
756c5857 497{
79fb4764
HJ
498 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
499 (__v4sf) __B,
500 (__v4sf)
fd79b414 501 _mm_avx512_setzero_ps (),
79fb4764 502 (__mmask8) __U, __R);
756c5857
AI
503}
504
79fb4764 505extern __inline __m128d
756c5857 506__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 507_mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
756c5857 508{
79fb4764
HJ
509 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
510 (__v2df) __B,
511 __R);
756c5857
AI
512}
513
79fb4764 514extern __inline __m128d
756c5857 515__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
516_mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
517 __m128d __B, const int __R)
756c5857 518{
79fb4764
HJ
519 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
520 (__v2df) __B,
521 (__v2df) __W,
522 (__mmask8) __U, __R);
756c5857
AI
523}
524
79fb4764 525extern __inline __m128d
756c5857 526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
527_mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
528 const int __R)
756c5857 529{
79fb4764
HJ
530 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
531 (__v2df) __B,
532 (__v2df)
fd79b414 533 _mm_avx512_setzero_pd (),
79fb4764 534 (__mmask8) __U, __R);
756c5857
AI
535}
536
79fb4764 537extern __inline __m128
756c5857 538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 539_mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
756c5857 540{
79fb4764
HJ
541 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
542 (__v4sf) __B,
543 __R);
756c5857
AI
544}
545
79fb4764 546extern __inline __m128
756c5857 547__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
548_mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
549 __m128 __B, const int __R)
756c5857 550{
79fb4764
HJ
551 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
552 (__v4sf) __B,
553 (__v4sf) __W,
554 (__mmask8) __U, __R);
756c5857
AI
555}
556
79fb4764 557extern __inline __m128
756c5857 558__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
559_mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
560 const int __R)
756c5857 561{
79fb4764
HJ
562 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
563 (__v4sf) __B,
564 (__v4sf)
fd79b414 565 _mm_avx512_setzero_ps (),
79fb4764 566 (__mmask8) __U, __R);
756c5857
AI
567}
568
79fb4764 569extern __inline __m128d
756c5857 570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 571_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
756c5857 572{
79fb4764
HJ
573 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
574 (__v2df) __B,
575 (__v2df)
fd79b414 576 _mm_avx512_setzero_pd (),
79fb4764 577 (__mmask8) -1, __R);
756c5857
AI
578}
579
79fb4764 580extern __inline __m128d
756c5857 581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
582_mm_mask_scalef_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
583 const int __R)
756c5857 584{
79fb4764
HJ
585 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
586 (__v2df) __B,
587 (__v2df) __W,
588 (__mmask8) __U, __R);
756c5857
AI
589}
590
79fb4764 591extern __inline __m128d
756c5857 592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
593_mm_maskz_scalef_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
594 const int __R)
756c5857 595{
79fb4764
HJ
596 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
597 (__v2df) __B,
598 (__v2df)
fd79b414 599 _mm_avx512_setzero_pd (),
79fb4764 600 (__mmask8) __U, __R);
756c5857
AI
601}
602
79fb4764 603extern __inline __m128
756c5857 604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 605_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
756c5857 606{
79fb4764
HJ
607 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
608 (__v4sf) __B,
609 (__v4sf)
fd79b414 610 _mm_avx512_setzero_ps (),
79fb4764 611 (__mmask8) -1, __R);
756c5857
AI
612}
613
79fb4764 614extern __inline __m128
756c5857 615__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
616_mm_mask_scalef_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
617 const int __R)
756c5857 618{
79fb4764
HJ
619 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
620 (__v4sf) __B,
621 (__v4sf) __W,
622 (__mmask8) __U, __R);
756c5857
AI
623}
624
79fb4764 625extern __inline __m128
756c5857 626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 627_mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
756c5857 628{
79fb4764
HJ
629 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
630 (__v4sf) __B,
631 (__v4sf)
fd79b414 632 _mm_avx512_setzero_ps (),
79fb4764 633 (__mmask8) __U, __R);
756c5857 634}
79fb4764
HJ
635#else
636#define _mm_sqrt_round_sd(A, B, C) \
637 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
fd79b414 638 (__v2df) _mm_avx512_setzero_pd (), -1, C)
756c5857 639
79fb4764
HJ
640#define _mm_mask_sqrt_round_sd(W, U, A, B, C) \
641 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, W, U, C)
756c5857 642
79fb4764
HJ
643#define _mm_maskz_sqrt_round_sd(U, A, B, C) \
644 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
fd79b414 645 (__v2df) _mm_avx512_setzero_pd (), U, C)
756c5857 646
79fb4764
HJ
647#define _mm_sqrt_round_ss(A, B, C) \
648 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
fd79b414 649 (__v4sf) _mm_avx512_setzero_ps (), -1, C)
503ac4e0 650
79fb4764
HJ
651#define _mm_mask_sqrt_round_ss(W, U, A, B, C) \
652 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, W, U, C)
503ac4e0 653
79fb4764
HJ
654#define _mm_maskz_sqrt_round_ss(U, A, B, C) \
655 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
fd79b414 656 (__v4sf) _mm_avx512_setzero_ps (), U, C)
79fb4764
HJ
657
658#define _mm_mul_round_sd(A, B, C) \
659 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
660
661#define _mm_mask_mul_round_sd(W, U, A, B, C) \
662 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C)
663
664#define _mm_maskz_mul_round_sd(U, A, B, C) \
fd79b414 665 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U, C)
79fb4764
HJ
666
667#define _mm_mul_round_ss(A, B, C) \
668 (__m128)__builtin_ia32_mulss_round(A, B, C)
669
670#define _mm_mask_mul_round_ss(W, U, A, B, C) \
671 (__m128)__builtin_ia32_mulss_mask_round(A, B, W, U, C)
672
673#define _mm_maskz_mul_round_ss(U, A, B, C) \
fd79b414 674 (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U, C)
79fb4764
HJ
675
676#define _mm_div_round_sd(A, B, C) \
677 (__m128d)__builtin_ia32_divsd_round(A, B, C)
678
679#define _mm_mask_div_round_sd(W, U, A, B, C) \
680 (__m128d)__builtin_ia32_divsd_mask_round(A, B, W, U, C)
681
682#define _mm_maskz_div_round_sd(U, A, B, C) \
fd79b414 683 (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U, C)
79fb4764
HJ
684
685#define _mm_div_round_ss(A, B, C) \
686 (__m128)__builtin_ia32_divss_round(A, B, C)
687
688#define _mm_mask_div_round_ss(W, U, A, B, C) \
689 (__m128)__builtin_ia32_divss_mask_round(A, B, W, U, C)
690
691#define _mm_maskz_div_round_ss(U, A, B, C) \
fd79b414 692 (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U, C)
79fb4764
HJ
693
694#define _mm_scalef_round_sd(A, B, C) \
695 ((__m128d) \
696 __builtin_ia32_scalefsd_mask_round ((A), (B), \
4bbabb2a 697 (__v2df) _mm_avx512_undefined_pd (), \
79fb4764
HJ
698 -1, (C)))
699
700#define _mm_scalef_round_ss(A, B, C) \
701 ((__m128) \
702 __builtin_ia32_scalefss_mask_round ((A), (B), \
4bbabb2a 703 (__v4sf) _mm_avx512_undefined_ps (), \
79fb4764
HJ
704 -1, (C)))
705
706#define _mm_mask_scalef_round_sd(W, U, A, B, C) \
707 ((__m128d) \
708 __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C)))
709
710#define _mm_mask_scalef_round_ss(W, U, A, B, C) \
711 ((__m128) \
712 __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C)))
713
714#define _mm_maskz_scalef_round_sd(U, A, B, C) \
715 ((__m128d) \
716 __builtin_ia32_scalefsd_mask_round ((A), (B), \
fd79b414 717 (__v2df) _mm_avx512_setzero_pd (), \
79fb4764
HJ
718 (U), (C)))
719
720#define _mm_maskz_scalef_round_ss(U, A, B, C) \
721 ((__m128) \
722 __builtin_ia32_scalefss_mask_round ((A), (B), \
fd79b414 723 (__v4sf) _mm_avx512_setzero_ps (), \
79fb4764
HJ
724 (U), (C)))
725#endif
726
727#define _mm_mask_sqrt_sd(W, U, A, B) \
728 _mm_mask_sqrt_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
729
730#define _mm_maskz_sqrt_sd(U, A, B) \
731 _mm_maskz_sqrt_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
732
733#define _mm_mask_sqrt_ss(W, U, A, B) \
734 _mm_mask_sqrt_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
735
736#define _mm_maskz_sqrt_ss(U, A, B) \
737 _mm_maskz_sqrt_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
738
739#define _mm_mask_scalef_sd(W, U, A, B) \
740 _mm_mask_scalef_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
741
742#define _mm_maskz_scalef_sd(U, A, B) \
743 _mm_maskz_scalef_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
744
745#define _mm_mask_scalef_ss(W, U, A, B) \
746 _mm_mask_scalef_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
747
748#define _mm_maskz_scalef_ss(U, A, B) \
749 _mm_maskz_scalef_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
750
751extern __inline __m128d
752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
753_mm_cvtu32_sd (__m128d __A, unsigned __B)
756c5857 754{
79fb4764 755 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
756c5857
AI
756}
757
79fb4764
HJ
758#ifdef __x86_64__
759#ifdef __OPTIMIZE__
760extern __inline __m128d
756c5857 761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 762_mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
756c5857 763{
79fb4764 764 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
756c5857
AI
765}
766
79fb4764 767extern __inline __m128d
756c5857 768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 769_mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
756c5857 770{
79fb4764 771 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
756c5857
AI
772}
773
79fb4764 774extern __inline __m128d
756c5857 775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 776_mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
756c5857 777{
79fb4764 778 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
756c5857 779}
79fb4764
HJ
780#else
781#define _mm_cvt_roundu64_sd(A, B, C) \
782 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
756c5857 783
79fb4764
HJ
784#define _mm_cvt_roundi64_sd(A, B, C) \
785 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
786
787#define _mm_cvt_roundsi64_sd(A, B, C) \
788 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
789#endif
790
791#endif
792
793#ifdef __OPTIMIZE__
794extern __inline __m128
756c5857 795__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 796_mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
756c5857 797{
79fb4764 798 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
756c5857
AI
799}
800
79fb4764 801extern __inline __m128
756c5857 802__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 803_mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
756c5857 804{
79fb4764 805 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
756c5857
AI
806}
807
79fb4764 808extern __inline __m128
756c5857 809__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 810_mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
756c5857 811{
79fb4764 812 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
756c5857 813}
79fb4764
HJ
814#else
815#define _mm_cvt_roundu32_ss(A, B, C) \
816 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
756c5857 817
79fb4764
HJ
818#define _mm_cvt_roundi32_ss(A, B, C) \
819 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
820
821#define _mm_cvt_roundsi32_ss(A, B, C) \
822 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
823#endif
824
825#ifdef __x86_64__
826#ifdef __OPTIMIZE__
827extern __inline __m128
756c5857 828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 829_mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
756c5857 830{
79fb4764 831 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
756c5857
AI
832}
833
79fb4764 834extern __inline __m128
756c5857 835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 836_mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
756c5857 837{
79fb4764 838 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
756c5857
AI
839}
840
79fb4764 841extern __inline __m128
756c5857 842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 843_mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
756c5857 844{
79fb4764 845 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
756c5857 846}
79fb4764
HJ
847#else
848#define _mm_cvt_roundu64_ss(A, B, C) \
849 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
756c5857 850
79fb4764
HJ
851#define _mm_cvt_roundi64_ss(A, B, C) \
852 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
853
854#define _mm_cvt_roundsi64_ss(A, B, C) \
855 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
856#endif
857
858#endif
859
860extern __inline __m128
756c5857 861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 862_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float *__P)
756c5857 863{
79fb4764 864 return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) __W, __U);
756c5857
AI
865}
866
79fb4764 867extern __inline __m128
756c5857 868__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 869_mm_maskz_load_ss (__mmask8 __U, const float *__P)
756c5857 870{
fd79b414 871 return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) _mm_avx512_setzero_ps (),
79fb4764 872 __U);
756c5857
AI
873}
874
79fb4764 875extern __inline __m128d
756c5857 876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 877_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double *__P)
756c5857 878{
79fb4764 879 return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) __W, __U);
756c5857
AI
880}
881
79fb4764 882extern __inline __m128d
756c5857 883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 884_mm_maskz_load_sd (__mmask8 __U, const double *__P)
756c5857 885{
fd79b414 886 return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) _mm_avx512_setzero_pd (),
79fb4764 887 __U);
756c5857
AI
888}
889
79fb4764 890extern __inline __m128
756c5857 891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 892_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
756c5857 893{
79fb4764
HJ
894 return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
895 (__v4sf) __W, __U);
756c5857
AI
896}
897
79fb4764 898extern __inline __m128
756c5857 899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 900_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
756c5857 901{
79fb4764 902 return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
fd79b414 903 (__v4sf) _mm_avx512_setzero_ps (), __U);
756c5857
AI
904}
905
79fb4764 906extern __inline __m128d
756c5857 907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 908_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
756c5857 909{
79fb4764
HJ
910 return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
911 (__v2df) __W, __U);
756c5857
AI
912}
913
79fb4764 914extern __inline __m128d
756c5857 915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 916_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
756c5857 917{
79fb4764 918 return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
fd79b414 919 (__v2df) _mm_avx512_setzero_pd (),
79fb4764 920 __U);
756c5857
AI
921}
922
79fb4764 923extern __inline void
756c5857 924__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 925_mm_mask_store_ss (float *__P, __mmask8 __U, __m128 __A)
756c5857 926{
79fb4764 927 __builtin_ia32_storess_mask (__P, (__v4sf) __A, (__mmask8) __U);
756c5857
AI
928}
929
79fb4764 930extern __inline void
756c5857 931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 932_mm_mask_store_sd (double *__P, __mmask8 __U, __m128d __A)
756c5857 933{
79fb4764 934 __builtin_ia32_storesd_mask (__P, (__v2df) __A, (__mmask8) __U);
756c5857
AI
935}
936
79fb4764
HJ
937#ifdef __OPTIMIZE__
938extern __inline __m128d
756c5857 939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
940_mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
941 const int __imm, const int __R)
756c5857 942{
79fb4764
HJ
943 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
944 (__v2df) __B,
945 (__v2di) __C, __imm,
946 (__mmask8) -1, __R);
756c5857
AI
947}
948
79fb4764 949extern __inline __m128d
756c5857 950__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
951_mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
952 __m128i __C, const int __imm, const int __R)
756c5857 953{
79fb4764
HJ
954 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
955 (__v2df) __B,
956 (__v2di) __C, __imm,
957 (__mmask8) __U, __R);
756c5857
AI
958}
959
79fb4764 960extern __inline __m128d
756c5857 961__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
962_mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
963 __m128i __C, const int __imm, const int __R)
756c5857 964{
79fb4764
HJ
965 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
966 (__v2df) __B,
967 (__v2di) __C,
968 __imm,
969 (__mmask8) __U, __R);
756c5857
AI
970}
971
79fb4764 972extern __inline __m128
756c5857 973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
974_mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
975 const int __imm, const int __R)
756c5857 976{
79fb4764
HJ
977 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
978 (__v4sf) __B,
979 (__v4si) __C, __imm,
980 (__mmask8) -1, __R);
756c5857
AI
981}
982
79fb4764 983extern __inline __m128
756c5857 984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
985_mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
986 __m128i __C, const int __imm, const int __R)
756c5857 987{
79fb4764
HJ
988 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
989 (__v4sf) __B,
990 (__v4si) __C, __imm,
991 (__mmask8) __U, __R);
756c5857
AI
992}
993
79fb4764 994extern __inline __m128
756c5857 995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
996_mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
997 __m128i __C, const int __imm, const int __R)
756c5857 998{
79fb4764
HJ
999 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
1000 (__v4sf) __B,
1001 (__v4si) __C, __imm,
1002 (__mmask8) __U, __R);
756c5857
AI
1003}
1004
79fb4764
HJ
1005#else
1006#define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
1007 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
1008 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
1009 (__mmask8)(-1), (R)))
756c5857 1010
79fb4764
HJ
1011#define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
1012 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
1013 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
1014 (__mmask8)(U), (R)))
1015
1016#define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
1017 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
1018 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
1019 (__mmask8)(U), (R)))
1020
1021#define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
1022 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
1023 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
1024 (__mmask8)(-1), (R)))
1025
1026#define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
1027 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
1028 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
1029 (__mmask8)(U), (R)))
1030
1031#define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
1032 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
1033 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
1034 (__mmask8)(U), (R)))
1035
1036#endif
1037
1038#ifdef __x86_64__
1039#ifdef __OPTIMIZE__
1040extern __inline unsigned long long
756c5857 1041__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1042_mm_cvt_roundss_u64 (__m128 __A, const int __R)
756c5857 1043{
79fb4764 1044 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
756c5857
AI
1045}
1046
79fb4764 1047extern __inline long long
756c5857 1048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1049_mm_cvt_roundss_si64 (__m128 __A, const int __R)
756c5857 1050{
79fb4764 1051 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
756c5857
AI
1052}
1053
79fb4764 1054extern __inline long long
756c5857 1055__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1056_mm_cvt_roundss_i64 (__m128 __A, const int __R)
756c5857 1057{
79fb4764 1058 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
756c5857
AI
1059}
1060
79fb4764 1061extern __inline unsigned long long
756c5857 1062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1063_mm_cvtt_roundss_u64 (__m128 __A, const int __R)
756c5857 1064{
79fb4764 1065 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
756c5857
AI
1066}
1067
79fb4764 1068extern __inline long long
756c5857 1069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1070_mm_cvtt_roundss_i64 (__m128 __A, const int __R)
756c5857 1071{
79fb4764 1072 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
756c5857
AI
1073}
1074
79fb4764 1075extern __inline long long
756c5857 1076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1077_mm_cvtt_roundss_si64 (__m128 __A, const int __R)
756c5857 1078{
79fb4764 1079 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
756c5857 1080}
79fb4764
HJ
1081#else
1082#define _mm_cvt_roundss_u64(A, B) \
1083 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
756c5857 1084
79fb4764
HJ
1085#define _mm_cvt_roundss_si64(A, B) \
1086 ((long long)__builtin_ia32_vcvtss2si64(A, B))
1087
1088#define _mm_cvt_roundss_i64(A, B) \
1089 ((long long)__builtin_ia32_vcvtss2si64(A, B))
1090
1091#define _mm_cvtt_roundss_u64(A, B) \
1092 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
1093
1094#define _mm_cvtt_roundss_i64(A, B) \
1095 ((long long)__builtin_ia32_vcvttss2si64(A, B))
1096
1097#define _mm_cvtt_roundss_si64(A, B) \
1098 ((long long)__builtin_ia32_vcvttss2si64(A, B))
1099#endif
1100#endif
1101
1102#ifdef __OPTIMIZE__
1103extern __inline unsigned
1104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1105_mm_cvt_roundss_u32 (__m128 __A, const int __R)
1106{
1107 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
756c5857
AI
1108}
1109
79fb4764 1110extern __inline int
756c5857 1111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1112_mm_cvt_roundss_si32 (__m128 __A, const int __R)
756c5857 1113{
79fb4764 1114 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
756c5857
AI
1115}
1116
79fb4764 1117extern __inline int
756c5857 1118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1119_mm_cvt_roundss_i32 (__m128 __A, const int __R)
756c5857 1120{
79fb4764 1121 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
756c5857
AI
1122}
1123
79fb4764 1124extern __inline unsigned
756c5857 1125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1126_mm_cvtt_roundss_u32 (__m128 __A, const int __R)
756c5857 1127{
79fb4764 1128 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
756c5857
AI
1129}
1130
79fb4764 1131extern __inline int
756c5857 1132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1133_mm_cvtt_roundss_i32 (__m128 __A, const int __R)
756c5857 1134{
79fb4764 1135 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
756c5857
AI
1136}
1137
79fb4764 1138extern __inline int
756c5857 1139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1140_mm_cvtt_roundss_si32 (__m128 __A, const int __R)
756c5857 1141{
79fb4764 1142 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
756c5857
AI
1143}
1144#else
79fb4764
HJ
1145#define _mm_cvt_roundss_u32(A, B) \
1146 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
756c5857 1147
79fb4764
HJ
1148#define _mm_cvt_roundss_si32(A, B) \
1149 ((int)__builtin_ia32_vcvtss2si32(A, B))
756c5857 1150
79fb4764
HJ
1151#define _mm_cvt_roundss_i32(A, B) \
1152 ((int)__builtin_ia32_vcvtss2si32(A, B))
1153
1154#define _mm_cvtt_roundss_u32(A, B) \
1155 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
1156
1157#define _mm_cvtt_roundss_si32(A, B) \
1158 ((int)__builtin_ia32_vcvttss2si32(A, B))
1159
1160#define _mm_cvtt_roundss_i32(A, B) \
1161 ((int)__builtin_ia32_vcvttss2si32(A, B))
756c5857
AI
1162#endif
1163
79fb4764
HJ
1164#ifdef __x86_64__
1165#ifdef __OPTIMIZE__
1166extern __inline unsigned long long
756c5857 1167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1168_mm_cvt_roundsd_u64 (__m128d __A, const int __R)
756c5857 1169{
79fb4764 1170 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
756c5857
AI
1171}
1172
79fb4764 1173extern __inline long long
756c5857 1174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1175_mm_cvt_roundsd_si64 (__m128d __A, const int __R)
756c5857 1176{
79fb4764 1177 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
756c5857
AI
1178}
1179
79fb4764 1180extern __inline long long
756c5857 1181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1182_mm_cvt_roundsd_i64 (__m128d __A, const int __R)
756c5857 1183{
79fb4764 1184 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
756c5857
AI
1185}
1186
79fb4764 1187extern __inline unsigned long long
756c5857 1188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1189_mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
756c5857 1190{
79fb4764 1191 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
756c5857
AI
1192}
1193
79fb4764 1194extern __inline long long
756c5857 1195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1196_mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
756c5857 1197{
79fb4764 1198 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
756c5857
AI
1199}
1200
79fb4764 1201extern __inline long long
756c5857 1202__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1203_mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
756c5857 1204{
79fb4764 1205 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
756c5857
AI
1206}
1207#else
79fb4764
HJ
1208#define _mm_cvt_roundsd_u64(A, B) \
1209 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
756c5857 1210
79fb4764
HJ
1211#define _mm_cvt_roundsd_si64(A, B) \
1212 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
756c5857 1213
79fb4764
HJ
1214#define _mm_cvt_roundsd_i64(A, B) \
1215 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
1216
1217#define _mm_cvtt_roundsd_u64(A, B) \
1218 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
1219
1220#define _mm_cvtt_roundsd_si64(A, B) \
1221 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
1222
1223#define _mm_cvtt_roundsd_i64(A, B) \
1224 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
1225#endif
756c5857
AI
1226#endif
1227
79fb4764
HJ
1228#ifdef __OPTIMIZE__
1229extern __inline unsigned
756c5857 1230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1231_mm_cvt_roundsd_u32 (__m128d __A, const int __R)
756c5857 1232{
79fb4764 1233 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
756c5857
AI
1234}
1235
79fb4764 1236extern __inline int
756c5857 1237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1238_mm_cvt_roundsd_si32 (__m128d __A, const int __R)
756c5857 1239{
79fb4764 1240 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
756c5857
AI
1241}
1242
79fb4764 1243extern __inline int
756c5857 1244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1245_mm_cvt_roundsd_i32 (__m128d __A, const int __R)
756c5857 1246{
79fb4764 1247 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
756c5857
AI
1248}
1249
79fb4764 1250extern __inline unsigned
756c5857 1251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1252_mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
756c5857 1253{
79fb4764 1254 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
756c5857
AI
1255}
1256
79fb4764 1257extern __inline int
756c5857 1258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1259_mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
756c5857 1260{
79fb4764 1261 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
756c5857
AI
1262}
1263
79fb4764 1264extern __inline int
756c5857 1265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1266_mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
756c5857 1267{
79fb4764 1268 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
756c5857 1269}
756c5857 1270
79fb4764 1271extern __inline __m128
756c5857 1272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1273_mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
756c5857 1274{
79fb4764
HJ
1275 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
1276 (__v2df) __B,
1277 __R);
756c5857
AI
1278}
1279
79fb4764 1280extern __inline __m128
756c5857 1281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1282_mm_mask_cvt_roundsd_ss (__m128 __W, __mmask8 __U, __m128 __A,
1283 __m128d __B, const int __R)
756c5857 1284{
79fb4764
HJ
1285 return (__m128) __builtin_ia32_cvtsd2ss_mask_round ((__v4sf) __A,
1286 (__v2df) __B,
1287 (__v4sf) __W,
1288 __U,
1289 __R);
756c5857
AI
1290}
1291
79fb4764 1292extern __inline __m128
756c5857 1293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1294_mm_maskz_cvt_roundsd_ss (__mmask8 __U, __m128 __A,
1295 __m128d __B, const int __R)
756c5857 1296{
79fb4764
HJ
1297 return (__m128) __builtin_ia32_cvtsd2ss_mask_round ((__v4sf) __A,
1298 (__v2df) __B,
fd79b414 1299 _mm_avx512_setzero_ps (),
79fb4764
HJ
1300 __U,
1301 __R);
756c5857
AI
1302}
1303
79fb4764 1304extern __inline __m128d
756c5857 1305__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1306_mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
756c5857 1307{
79fb4764
HJ
1308 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
1309 (__v4sf) __B,
1310 __R);
756c5857
AI
1311}
1312
79fb4764 1313extern __inline __m128d
756c5857 1314__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1315_mm_mask_cvt_roundss_sd (__m128d __W, __mmask8 __U, __m128d __A,
1316 __m128 __B, const int __R)
756c5857 1317{
79fb4764
HJ
1318 return (__m128d) __builtin_ia32_cvtss2sd_mask_round ((__v2df) __A,
1319 (__v4sf) __B,
1320 (__v2df) __W,
1321 __U,
1322 __R);
756c5857
AI
1323}
1324
79fb4764 1325extern __inline __m128d
756c5857 1326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1327_mm_maskz_cvt_roundss_sd (__mmask8 __U, __m128d __A,
1328 __m128 __B, const int __R)
756c5857 1329{
79fb4764
HJ
1330 return (__m128d) __builtin_ia32_cvtss2sd_mask_round ((__v2df) __A,
1331 (__v4sf) __B,
fd79b414 1332 _mm_avx512_setzero_pd (),
79fb4764
HJ
1333 __U,
1334 __R);
756c5857 1335}
756c5857 1336
79fb4764 1337extern __inline __m128
756c5857 1338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1339_mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
756c5857 1340{
79fb4764
HJ
1341 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
1342 (__v4sf) __B,
1343 __R);
756c5857
AI
1344}
1345
79fb4764 1346extern __inline __m128
756c5857 1347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1348_mm_mask_getexp_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1349 __m128 __B, const int __R)
756c5857 1350{
79fb4764
HJ
1351 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
1352 (__v4sf) __B,
1353 (__v4sf) __W,
1354 (__mmask8) __U, __R);
756c5857
AI
1355}
1356
79fb4764 1357extern __inline __m128
756c5857 1358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1359_mm_maskz_getexp_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1360 const int __R)
756c5857 1361{
79fb4764
HJ
1362 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
1363 (__v4sf) __B,
1364 (__v4sf)
fd79b414 1365 _mm_avx512_setzero_ps (),
79fb4764 1366 (__mmask8) __U, __R);
756c5857
AI
1367}
1368
79fb4764 1369extern __inline __m128d
756c5857 1370__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1371_mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
756c5857 1372{
79fb4764
HJ
1373 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
1374 (__v2df) __B,
1375 __R);
756c5857
AI
1376}
1377
79fb4764 1378extern __inline __m128d
756c5857 1379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1380_mm_mask_getexp_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1381 __m128d __B, const int __R)
756c5857 1382{
79fb4764
HJ
1383 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
1384 (__v2df) __B,
1385 (__v2df) __W,
1386 (__mmask8) __U, __R);
756c5857
AI
1387}
1388
79fb4764 1389extern __inline __m128d
756c5857 1390__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1391_mm_maskz_getexp_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1392 const int __R)
756c5857 1393{
79fb4764
HJ
1394 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
1395 (__v2df) __B,
1396 (__v2df)
fd79b414 1397 _mm_avx512_setzero_pd (),
79fb4764 1398 (__mmask8) __U, __R);
756c5857 1399}
756c5857 1400
79fb4764 1401extern __inline __m128d
756c5857 1402__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1403_mm_getmant_round_sd (__m128d __A, __m128d __B,
1404 _MM_MANTISSA_NORM_ENUM __C,
1405 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
756c5857 1406{
79fb4764
HJ
1407 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
1408 (__v2df) __B,
1409 (__D << 2) | __C,
1410 __R);
756c5857
AI
1411}
1412
79fb4764 1413extern __inline __m128d
756c5857 1414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1415_mm_mask_getmant_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1416 __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
1417 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
756c5857 1418{
79fb4764
HJ
1419 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
1420 (__v2df) __B,
1421 (__D << 2) | __C,
1422 (__v2df) __W,
1423 __U, __R);
756c5857
AI
1424}
1425
79fb4764 1426extern __inline __m128d
756c5857 1427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1428_mm_maskz_getmant_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1429 _MM_MANTISSA_NORM_ENUM __C,
1430 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
756c5857 1431{
79fb4764
HJ
1432 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
1433 (__v2df) __B,
1434 (__D << 2) | __C,
1435 (__v2df)
fd79b414 1436 _mm_avx512_setzero_pd(),
79fb4764 1437 __U, __R);
756c5857
AI
1438}
1439
79fb4764 1440extern __inline __m128
075691af 1441__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1442_mm_getmant_round_ss (__m128 __A, __m128 __B,
1443 _MM_MANTISSA_NORM_ENUM __C,
1444 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
075691af 1445{
79fb4764
HJ
1446 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
1447 (__v4sf) __B,
1448 (__D << 2) | __C,
1449 __R);
075691af
AI
1450}
1451
79fb4764 1452extern __inline __m128
1853f5c7 1453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1454_mm_mask_getmant_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1455 __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
1456 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
1853f5c7 1457{
79fb4764
HJ
1458 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
1459 (__v4sf) __B,
1460 (__D << 2) | __C,
1461 (__v4sf) __W,
1462 __U, __R);
1853f5c7
SP
1463}
1464
79fb4764 1465extern __inline __m128
1853f5c7 1466__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1467_mm_maskz_getmant_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1468 _MM_MANTISSA_NORM_ENUM __C,
1469 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
1853f5c7 1470{
79fb4764
HJ
1471 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
1472 (__v4sf) __B,
1473 (__D << 2) | __C,
1474 (__v4sf)
fd79b414 1475 _mm_avx512_setzero_ps(),
79fb4764 1476 __U, __R);
1853f5c7
SP
1477}
1478
075691af
AI
1479extern __inline __m128
1480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1481_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm,
1482 const int __R)
075691af 1483{
79fb4764
HJ
1484 return (__m128)
1485 __builtin_ia32_rndscaless_mask_round ((__v4sf) __A,
1486 (__v4sf) __B, __imm,
1487 (__v4sf)
fd79b414 1488 _mm_avx512_setzero_ps (),
79fb4764
HJ
1489 (__mmask8) -1,
1490 __R);
075691af
AI
1491}
1492
1853f5c7
SP
1493extern __inline __m128
1494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1495_mm_mask_roundscale_round_ss (__m128 __A, __mmask8 __B, __m128 __C,
1496 __m128 __D, const int __imm, const int __R)
1853f5c7 1497{
79fb4764
HJ
1498 return (__m128)
1499 __builtin_ia32_rndscaless_mask_round ((__v4sf) __C,
1500 (__v4sf) __D, __imm,
1501 (__v4sf) __A,
1502 (__mmask8) __B,
1503 __R);
1853f5c7
SP
1504}
1505
1506extern __inline __m128
1507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1508_mm_maskz_roundscale_round_ss (__mmask8 __A, __m128 __B, __m128 __C,
1509 const int __imm, const int __R)
1853f5c7 1510{
79fb4764
HJ
1511 return (__m128)
1512 __builtin_ia32_rndscaless_mask_round ((__v4sf) __B,
1513 (__v4sf) __C, __imm,
1514 (__v4sf)
fd79b414 1515 _mm_avx512_setzero_ps (),
79fb4764
HJ
1516 (__mmask8) __A,
1517 __R);
1853f5c7
SP
1518}
1519
075691af
AI
1520extern __inline __m128d
1521__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1522_mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
1523 const int __R)
075691af 1524{
79fb4764
HJ
1525 return (__m128d)
1526 __builtin_ia32_rndscalesd_mask_round ((__v2df) __A,
1527 (__v2df) __B, __imm,
1528 (__v2df)
fd79b414 1529 _mm_avx512_setzero_pd (),
79fb4764
HJ
1530 (__mmask8) -1,
1531 __R);
075691af
AI
1532}
1533
1853f5c7
SP
1534extern __inline __m128d
1535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1536_mm_mask_roundscale_round_sd (__m128d __A, __mmask8 __B, __m128d __C,
1537 __m128d __D, const int __imm, const int __R)
1853f5c7 1538{
79fb4764
HJ
1539 return (__m128d)
1540 __builtin_ia32_rndscalesd_mask_round ((__v2df) __C,
1541 (__v2df) __D, __imm,
1542 (__v2df) __A,
1543 (__mmask8) __B,
1544 __R);
1853f5c7
SP
1545}
1546
1547extern __inline __m128d
1548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1549_mm_maskz_roundscale_round_sd (__mmask8 __A, __m128d __B, __m128d __C,
1550 const int __imm, const int __R)
1853f5c7 1551{
79fb4764
HJ
1552 return (__m128d)
1553 __builtin_ia32_rndscalesd_mask_round ((__v2df) __B,
1554 (__v2df) __C, __imm,
1555 (__v2df)
fd79b414 1556 _mm_avx512_setzero_pd (),
79fb4764
HJ
1557 (__mmask8) __A,
1558 __R);
1853f5c7
SP
1559}
1560
79fb4764
HJ
1561#else
1562#define _mm_cvt_roundsd_u32(A, B) \
1563 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
075691af 1564
79fb4764
HJ
1565#define _mm_cvt_roundsd_si32(A, B) \
1566 ((int)__builtin_ia32_vcvtsd2si32(A, B))
1853f5c7 1567
79fb4764
HJ
1568#define _mm_cvt_roundsd_i32(A, B) \
1569 ((int)__builtin_ia32_vcvtsd2si32(A, B))
1853f5c7 1570
79fb4764
HJ
1571#define _mm_cvtt_roundsd_u32(A, B) \
1572 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
075691af 1573
79fb4764
HJ
1574#define _mm_cvtt_roundsd_si32(A, B) \
1575 ((int)__builtin_ia32_vcvttsd2si32(A, B))
1853f5c7 1576
79fb4764
HJ
1577#define _mm_cvtt_roundsd_i32(A, B) \
1578 ((int)__builtin_ia32_vcvttsd2si32(A, B))
1853f5c7 1579
79fb4764
HJ
1580#define _mm_cvt_roundsd_ss(A, B, C) \
1581 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
075691af 1582
79fb4764
HJ
1583#define _mm_mask_cvt_roundsd_ss(W, U, A, B, C) \
1584 (__m128)__builtin_ia32_cvtsd2ss_mask_round ((A), (B), (W), (U), (C))
1853f5c7 1585
79fb4764 1586#define _mm_maskz_cvt_roundsd_ss(U, A, B, C) \
fd79b414 1587 (__m128)__builtin_ia32_cvtsd2ss_mask_round ((A), (B), _mm_avx512_setzero_ps (), \
79fb4764 1588 (U), (C))
1853f5c7 1589
79fb4764
HJ
1590#define _mm_cvt_roundss_sd(A, B, C) \
1591 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
075691af 1592
79fb4764
HJ
1593#define _mm_mask_cvt_roundss_sd(W, U, A, B, C) \
1594 (__m128d)__builtin_ia32_cvtss2sd_mask_round ((A), (B), (W), (U), (C))
1853f5c7 1595
79fb4764 1596#define _mm_maskz_cvt_roundss_sd(U, A, B, C) \
fd79b414 1597 (__m128d)__builtin_ia32_cvtss2sd_mask_round ((A), (B), _mm_avx512_setzero_pd (), \
79fb4764 1598 (U), (C))
1853f5c7 1599
79fb4764
HJ
1600#define _mm_getmant_round_sd(X, Y, C, D, R) \
1601 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
1602 (__v2df)(__m128d)(Y), \
1603 (int)(((D)<<2) | (C)), \
1604 (R)))
1853f5c7 1605
79fb4764
HJ
1606#define _mm_mask_getmant_round_sd(W, U, X, Y, C, D, R) \
1607 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
1608 (__v2df)(__m128d)(Y), \
1609 (int)(((D)<<2) | (C)), \
1610 (__v2df)(__m128d)(W), \
1611 (__mmask8)(U),\
1612 (R)))
1853f5c7 1613
79fb4764
HJ
1614#define _mm_maskz_getmant_round_sd(U, X, Y, C, D, R) \
1615 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
1616 (__v2df)(__m128d)(Y), \
1617 (int)(((D)<<2) | (C)), \
fd79b414 1618 (__v2df)(__m128d)_mm_avx512_setzero_pd(), \
79fb4764
HJ
1619 (__mmask8)(U),\
1620 (R)))
1621
1622#define _mm_getmant_round_ss(X, Y, C, D, R) \
1623 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
1624 (__v4sf)(__m128)(Y), \
1625 (int)(((D)<<2) | (C)), \
1626 (R)))
1627
1628#define _mm_mask_getmant_round_ss(W, U, X, Y, C, D, R) \
1629 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
1630 (__v4sf)(__m128)(Y), \
1631 (int)(((D)<<2) | (C)), \
1632 (__v4sf)(__m128)(W), \
1633 (__mmask8)(U),\
1634 (R)))
1635
1636#define _mm_maskz_getmant_round_ss(U, X, Y, C, D, R) \
1637 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
1638 (__v4sf)(__m128)(Y), \
1639 (int)(((D)<<2) | (C)), \
fd79b414 1640 (__v4sf)(__m128)_mm_avx512_setzero_ps(), \
79fb4764
HJ
1641 (__mmask8)(U),\
1642 (R)))
1643
1644#define _mm_getexp_round_ss(A, B, R) \
1645 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
1646
1647#define _mm_mask_getexp_round_ss(W, U, A, B, C) \
1648 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U, C)
1649
1650#define _mm_maskz_getexp_round_ss(U, A, B, C) \
fd79b414 1651 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U, C)
79fb4764
HJ
1652
1653#define _mm_getexp_round_sd(A, B, R) \
1654 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
1655
1656#define _mm_mask_getexp_round_sd(W, U, A, B, C) \
1657 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U, C)
1658
1659#define _mm_maskz_getexp_round_sd(U, A, B, C) \
fd79b414 1660 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U, C)
79fb4764
HJ
1661
1662#define _mm_roundscale_round_ss(A, B, I, R) \
1663 ((__m128) \
1664 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
1665 (__v4sf) (__m128) (B), \
1666 (int) (I), \
fd79b414 1667 (__v4sf) _mm_avx512_setzero_ps (), \
79fb4764
HJ
1668 (__mmask8) (-1), \
1669 (int) (R)))
1670#define _mm_mask_roundscale_round_ss(A, U, B, C, I, R) \
1671 ((__m128) \
1672 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (B), \
1673 (__v4sf) (__m128) (C), \
1674 (int) (I), \
1675 (__v4sf) (__m128) (A), \
1676 (__mmask8) (U), \
1677 (int) (R)))
1678#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
1679 ((__m128) \
1680 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
1681 (__v4sf) (__m128) (B), \
1682 (int) (I), \
fd79b414 1683 (__v4sf) _mm_avx512_setzero_ps (), \
79fb4764
HJ
1684 (__mmask8) (U), \
1685 (int) (R)))
1686#define _mm_roundscale_round_sd(A, B, I, R) \
1687 ((__m128d) \
1688 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
1689 (__v2df) (__m128d) (B), \
1690 (int) (I), \
fd79b414 1691 (__v2df) _mm_avx512_setzero_pd (), \
79fb4764
HJ
1692 (__mmask8) (-1), \
1693 (int) (R)))
1694#define _mm_mask_roundscale_round_sd(A, U, B, C, I, R) \
1695 ((__m128d) \
1696 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (B), \
1697 (__v2df) (__m128d) (C), \
1698 (int) (I), \
1699 (__v2df) (__m128d) (A), \
1700 (__mmask8) (U), \
1701 (int) (R)))
1702#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
1703 ((__m128d) \
1704 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
1705 (__v2df) (__m128d) (B), \
1706 (int) (I), \
fd79b414 1707 (__v2df) _mm_avx512_setzero_pd (), \
79fb4764
HJ
1708 (__mmask8) (U), \
1709 (int) (R)))
1853f5c7 1710
075691af
AI
1711#endif
1712
79fb4764
HJ
1713#define _mm_mask_cvtss_sd(W, U, A, B) \
1714 _mm_mask_cvt_roundss_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
1715
1716#define _mm_maskz_cvtss_sd(U, A, B) \
1717 _mm_maskz_cvt_roundss_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
1718
1719#define _mm_mask_cvtsd_ss(W, U, A, B) \
1720 _mm_mask_cvt_roundsd_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
1721
1722#define _mm_maskz_cvtsd_ss(U, A, B) \
1723 _mm_maskz_cvt_roundsd_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
50d9ca71 1724
756c5857 1725#ifdef __OPTIMIZE__
79fb4764 1726extern __inline __mmask16
756c5857 1727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1728_kshiftli_mask16 (__mmask16 __A, unsigned int __B)
756c5857 1729{
79fb4764
HJ
1730 return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A,
1731 (__mmask8) __B);
756c5857
AI
1732}
1733
79fb4764 1734extern __inline __mmask16
756c5857 1735__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1736_kshiftri_mask16 (__mmask16 __A, unsigned int __B)
756c5857 1737{
79fb4764
HJ
1738 return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A,
1739 (__mmask8) __B);
756c5857
AI
1740}
1741
79fb4764 1742extern __inline __mmask8
756c5857 1743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1744_mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
756c5857 1745{
79fb4764
HJ
1746 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
1747 (__v2df) __Y, __P,
1748 (__mmask8) -1, __R);
756c5857
AI
1749}
1750
79fb4764 1751extern __inline __mmask8
756c5857 1752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1753_mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
1754 const int __P, const int __R)
756c5857 1755{
79fb4764
HJ
1756 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
1757 (__v2df) __Y, __P,
1758 (__mmask8) __M, __R);
756c5857
AI
1759}
1760
79fb4764 1761extern __inline __mmask8
756c5857 1762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1763_mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
756c5857 1764{
79fb4764
HJ
1765 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
1766 (__v4sf) __Y, __P,
1767 (__mmask8) -1, __R);
756c5857
AI
1768}
1769
79fb4764 1770extern __inline __mmask8
756c5857 1771__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1772_mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
1773 const int __P, const int __R)
756c5857 1774{
79fb4764
HJ
1775 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
1776 (__v4sf) __Y, __P,
1777 (__mmask8) __M, __R);
756c5857 1778}
79fb4764 1779
756c5857 1780#else
79fb4764
HJ
1781#define _kshiftli_mask16(X, Y) \
1782 ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y)))
756c5857 1783
79fb4764
HJ
1784#define _kshiftri_mask16(X, Y) \
1785 ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y)))
756c5857 1786
79fb4764
HJ
1787#define _mm_cmp_round_sd_mask(X, Y, P, R) \
1788 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
1789 (__v2df)(__m128d)(Y), (int)(P),\
1790 (__mmask8)-1, R))
756c5857 1791
79fb4764
HJ
1792#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
1793 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
1794 (__v2df)(__m128d)(Y), (int)(P),\
1795 (M), R))
756c5857 1796
79fb4764
HJ
1797#define _mm_cmp_round_ss_mask(X, Y, P, R) \
1798 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
1799 (__v4sf)(__m128)(Y), (int)(P), \
1800 (__mmask8)-1, R))
756c5857 1801
79fb4764
HJ
1802#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
1803 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
1804 (__v4sf)(__m128)(Y), (int)(P), \
1805 (M), R))
1806
1807#endif
1808
1809extern __inline unsigned char
756c5857 1810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1811_kortest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
756c5857 1812{
79fb4764
HJ
1813 *__CF = (unsigned char) __builtin_ia32_kortestchi (__A, __B);
1814 return (unsigned char) __builtin_ia32_kortestzhi (__A, __B);
756c5857
AI
1815}
1816
79fb4764 1817extern __inline unsigned char
756c5857 1818__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1819_kortestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
756c5857 1820{
79fb4764
HJ
1821 return (unsigned char) __builtin_ia32_kortestzhi ((__mmask16) __A,
1822 (__mmask16) __B);
756c5857
AI
1823}
1824
79fb4764 1825extern __inline unsigned char
075691af 1826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1827_kortestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
075691af 1828{
79fb4764
HJ
1829 return (unsigned char) __builtin_ia32_kortestchi ((__mmask16) __A,
1830 (__mmask16) __B);
075691af
AI
1831}
1832
79fb4764 1833extern __inline unsigned int
f4ee3a9e 1834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1835_cvtmask16_u32 (__mmask16 __A)
f4ee3a9e 1836{
79fb4764 1837 return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A);
f4ee3a9e
UB
1838}
1839
79fb4764 1840extern __inline __mmask16
f4ee3a9e 1841__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1842_cvtu32_mask16 (unsigned int __A)
f4ee3a9e 1843{
79fb4764 1844 return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A);
f4ee3a9e
UB
1845}
1846
79fb4764 1847extern __inline __mmask16
075691af 1848__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1849_load_mask16 (__mmask16 *__A)
075691af 1850{
79fb4764 1851 return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A);
075691af
AI
1852}
1853
79fb4764 1854extern __inline void
f4ee3a9e 1855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1856_store_mask16 (__mmask16 *__A, __mmask16 __B)
f4ee3a9e 1857{
79fb4764 1858 *(__mmask16 *) __A = __builtin_ia32_kmovw (__B);
f4ee3a9e
UB
1859}
1860
79fb4764 1861extern __inline __mmask16
f4ee3a9e 1862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1863_kand_mask16 (__mmask16 __A, __mmask16 __B)
f4ee3a9e 1864{
79fb4764 1865 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
f4ee3a9e
UB
1866}
1867
79fb4764 1868extern __inline __mmask16
756c5857 1869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1870_kandn_mask16 (__mmask16 __A, __mmask16 __B)
756c5857 1871{
79fb4764
HJ
1872 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
1873 (__mmask16) __B);
756c5857
AI
1874}
1875
79fb4764 1876extern __inline __mmask16
756c5857 1877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1878_kor_mask16 (__mmask16 __A, __mmask16 __B)
756c5857 1879{
79fb4764 1880 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
756c5857
AI
1881}
1882
79fb4764 1883extern __inline __mmask16
756c5857 1884__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1885_kxnor_mask16 (__mmask16 __A, __mmask16 __B)
756c5857 1886{
79fb4764 1887 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
756c5857
AI
1888}
1889
79fb4764 1890extern __inline __mmask16
756c5857 1891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1892_kxor_mask16 (__mmask16 __A, __mmask16 __B)
756c5857 1893{
79fb4764 1894 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
756c5857
AI
1895}
1896
79fb4764 1897extern __inline __mmask16
756c5857 1898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1899_knot_mask16 (__mmask16 __A)
756c5857 1900{
79fb4764 1901 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
756c5857
AI
1902}
1903
79fb4764 1904extern __inline __mmask16
756c5857 1905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1906_kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
756c5857 1907{
79fb4764 1908 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
756c5857
AI
1909}
1910
79fb4764 1911#ifdef __OPTIMIZE__
075691af
AI
1912extern __inline __m128d
1913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1914_mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
075691af 1915{
79fb4764
HJ
1916 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
1917 (__v2df) __B,
1918 __R);
075691af
AI
1919}
1920
d7a33a4c
JK
1921extern __inline __m128d
1922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1923_mm_mask_max_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1924 __m128d __B, const int __R)
d7a33a4c 1925{
79fb4764
HJ
1926 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
1927 (__v2df) __B,
d7a33a4c 1928 (__v2df) __W,
79fb4764 1929 (__mmask8) __U, __R);
d7a33a4c
JK
1930}
1931
1932extern __inline __m128d
1933__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1934_mm_maskz_max_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1935 const int __R)
d7a33a4c 1936{
79fb4764
HJ
1937 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
1938 (__v2df) __B,
1939 (__v2df)
fd79b414 1940 _mm_avx512_setzero_pd (),
79fb4764 1941 (__mmask8) __U, __R);
d7a33a4c
JK
1942}
1943
075691af
AI
1944extern __inline __m128
1945__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1946_mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
075691af 1947{
79fb4764
HJ
1948 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
1949 (__v4sf) __B,
1950 __R);
075691af
AI
1951}
1952
d7a33a4c
JK
1953extern __inline __m128
1954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1955_mm_mask_max_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1956 __m128 __B, const int __R)
1957{
1958 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
1959 (__v4sf) __B,
d7a33a4c 1960 (__v4sf) __W,
79fb4764 1961 (__mmask8) __U, __R);
d7a33a4c
JK
1962}
1963
1964extern __inline __m128
1965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1966_mm_maskz_max_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1967 const int __R)
d7a33a4c 1968{
79fb4764
HJ
1969 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
1970 (__v4sf) __B,
1971 (__v4sf)
fd79b414 1972 _mm_avx512_setzero_ps (),
79fb4764 1973 (__mmask8) __U, __R);
d7a33a4c
JK
1974}
1975
79fb4764 1976extern __inline __m128d
756c5857 1977__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 1978_mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
756c5857 1979{
79fb4764
HJ
1980 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
1981 (__v2df) __B,
1982 __R);
756c5857
AI
1983}
1984
79fb4764 1985extern __inline __m128d
756c5857 1986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1987_mm_mask_min_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1988 __m128d __B, const int __R)
756c5857 1989{
79fb4764
HJ
1990 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
1991 (__v2df) __B,
1992 (__v2df) __W,
1993 (__mmask8) __U, __R);
756c5857
AI
1994}
1995
79fb4764 1996extern __inline __m128d
756c5857 1997__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
1998_mm_maskz_min_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1999 const int __R)
756c5857 2000{
79fb4764
HJ
2001 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
2002 (__v2df) __B,
2003 (__v2df)
fd79b414 2004 _mm_avx512_setzero_pd (),
79fb4764 2005 (__mmask8) __U, __R);
756c5857
AI
2006}
2007
79fb4764 2008extern __inline __m128
756c5857 2009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2010_mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
756c5857 2011{
79fb4764
HJ
2012 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
2013 (__v4sf) __B,
2014 __R);
756c5857
AI
2015}
2016
79fb4764 2017extern __inline __m128
756c5857 2018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2019_mm_mask_min_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2020 __m128 __B, const int __R)
756c5857 2021{
79fb4764
HJ
2022 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
2023 (__v4sf) __B,
2024 (__v4sf) __W,
2025 (__mmask8) __U, __R);
756c5857
AI
2026}
2027
79fb4764 2028extern __inline __m128
756c5857 2029__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2030_mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2031 const int __R)
756c5857 2032{
79fb4764
HJ
2033 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
2034 (__v4sf) __B,
2035 (__v4sf)
fd79b414 2036 _mm_avx512_setzero_ps (),
79fb4764 2037 (__mmask8) __U, __R);
756c5857
AI
2038}
2039
79fb4764
HJ
2040#else
2041#define _mm_max_round_sd(A, B, C) \
2042 (__m128d)__builtin_ia32_maxsd_round(A, B, C)
2043
2044#define _mm_mask_max_round_sd(W, U, A, B, C) \
2045 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, W, U, C)
2046
2047#define _mm_maskz_max_round_sd(U, A, B, C) \
fd79b414 2048 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U, C)
79fb4764
HJ
2049
2050#define _mm_max_round_ss(A, B, C) \
2051 (__m128)__builtin_ia32_maxss_round(A, B, C)
2052
2053#define _mm_mask_max_round_ss(W, U, A, B, C) \
2054 (__m128)__builtin_ia32_maxss_mask_round(A, B, W, U, C)
2055
2056#define _mm_maskz_max_round_ss(U, A, B, C) \
fd79b414 2057 (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U, C)
79fb4764
HJ
2058
2059#define _mm_min_round_sd(A, B, C) \
2060 (__m128d)__builtin_ia32_minsd_round(A, B, C)
2061
2062#define _mm_mask_min_round_sd(W, U, A, B, C) \
2063 (__m128d)__builtin_ia32_minsd_mask_round(A, B, W, U, C)
2064
2065#define _mm_maskz_min_round_sd(U, A, B, C) \
fd79b414 2066 (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U, C)
79fb4764
HJ
2067
2068#define _mm_min_round_ss(A, B, C) \
2069 (__m128)__builtin_ia32_minss_round(A, B, C)
2070
2071#define _mm_mask_min_round_ss(W, U, A, B, C) \
2072 (__m128)__builtin_ia32_minss_mask_round(A, B, W, U, C)
2073
2074#define _mm_maskz_min_round_ss(U, A, B, C) \
fd79b414 2075 (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U, C)
79fb4764
HJ
2076
2077#endif
2078
2079#ifdef __OPTIMIZE__
075691af
AI
2080extern __inline __m128d
2081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2082_mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
075691af 2083{
79fb4764
HJ
2084 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
2085 (__v2df) __A,
2086 (__v2df) __B,
2087 __R);
b10bc0d6
OM
2088}
2089
79fb4764 2090extern __inline __m128
b10bc0d6 2091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2092_mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
b10bc0d6 2093{
79fb4764
HJ
2094 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
2095 (__v4sf) __A,
2096 (__v4sf) __B,
2097 __R);
b10bc0d6
OM
2098}
2099
2100extern __inline __m128d
2101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2102_mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
b10bc0d6 2103{
79fb4764
HJ
2104 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
2105 (__v2df) __A,
2106 -(__v2df) __B,
2107 __R);
075691af
AI
2108}
2109
2110extern __inline __m128
2111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2112_mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
075691af 2113{
79fb4764
HJ
2114 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
2115 (__v4sf) __A,
2116 -(__v4sf) __B,
2117 __R);
2118}
2119
2120extern __inline __m128d
2121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2122_mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
2123{
2124 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
2125 -(__v2df) __A,
2126 (__v2df) __B,
2127 __R);
b10bc0d6
OM
2128}
2129
2130extern __inline __m128
2131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2132_mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
b10bc0d6 2133{
79fb4764
HJ
2134 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
2135 -(__v4sf) __A,
2136 (__v4sf) __B,
2137 __R);
2138}
2139
2140extern __inline __m128d
2141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2142_mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
2143{
2144 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
2145 -(__v2df) __A,
2146 -(__v2df) __B,
2147 __R);
b10bc0d6
OM
2148}
2149
2150extern __inline __m128
2151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2152_mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
b10bc0d6 2153{
79fb4764
HJ
2154 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
2155 -(__v4sf) __A,
2156 -(__v4sf) __B,
2157 __R);
075691af 2158}
756c5857 2159#else
79fb4764
HJ
2160#define _mm_fmadd_round_sd(A, B, C, R) \
2161 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
756c5857 2162
79fb4764
HJ
2163#define _mm_fmadd_round_ss(A, B, C, R) \
2164 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
756c5857 2165
79fb4764
HJ
2166#define _mm_fmsub_round_sd(A, B, C, R) \
2167 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
756c5857 2168
79fb4764
HJ
2169#define _mm_fmsub_round_ss(A, B, C, R) \
2170 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
756c5857 2171
79fb4764
HJ
2172#define _mm_fnmadd_round_sd(A, B, C, R) \
2173 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
756c5857 2174
79fb4764
HJ
2175#define _mm_fnmadd_round_ss(A, B, C, R) \
2176 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
075691af 2177
79fb4764
HJ
2178#define _mm_fnmsub_round_sd(A, B, C, R) \
2179 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
b10bc0d6 2180
79fb4764
HJ
2181#define _mm_fnmsub_round_ss(A, B, C, R) \
2182 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
756c5857
AI
2183#endif
2184
79fb4764 2185extern __inline __m128d
756c5857 2186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2187_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
756c5857 2188{
79fb4764
HJ
2189 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
2190 (__v2df) __A,
2191 (__v2df) __B,
2192 (__mmask8) __U,
2193 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2194}
2195
79fb4764 2196extern __inline __m128
756c5857 2197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2198_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
756c5857 2199{
79fb4764
HJ
2200 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
2201 (__v4sf) __A,
2202 (__v4sf) __B,
2203 (__mmask8) __U,
2204 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2205}
2206
79fb4764 2207extern __inline __m128d
756c5857 2208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2209_mm_mask3_fmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
756c5857 2210{
79fb4764
HJ
2211 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
2212 (__v2df) __A,
2213 (__v2df) __B,
2214 (__mmask8) __U,
2215 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2216}
2217
79fb4764 2218extern __inline __m128
756c5857 2219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2220_mm_mask3_fmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
756c5857 2221{
79fb4764
HJ
2222 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
2223 (__v4sf) __A,
2224 (__v4sf) __B,
2225 (__mmask8) __U,
2226 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2227}
2228
79fb4764 2229extern __inline __m128d
756c5857 2230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2231_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
756c5857 2232{
79fb4764
HJ
2233 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
2234 (__v2df) __A,
2235 (__v2df) __B,
2236 (__mmask8) __U,
2237 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2238}
2239
79fb4764 2240extern __inline __m128
756c5857 2241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2242_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
756c5857 2243{
79fb4764
HJ
2244 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
2245 (__v4sf) __A,
2246 (__v4sf) __B,
2247 (__mmask8) __U,
2248 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2249}
2250
79fb4764 2251extern __inline __m128d
756c5857 2252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2253_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
756c5857 2254{
79fb4764
HJ
2255 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
2256 (__v2df) __A,
2257 -(__v2df) __B,
2258 (__mmask8) __U,
2259 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2260}
2261
79fb4764 2262extern __inline __m128
756c5857 2263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2264_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
756c5857 2265{
79fb4764
HJ
2266 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
2267 (__v4sf) __A,
2268 -(__v4sf) __B,
2269 (__mmask8) __U,
2270 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2271}
2272
79fb4764 2273extern __inline __m128d
756c5857 2274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2275_mm_mask3_fmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
756c5857 2276{
79fb4764
HJ
2277 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
2278 (__v2df) __A,
2279 (__v2df) __B,
2280 (__mmask8) __U,
2281 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2282}
2283
79fb4764 2284extern __inline __m128
756c5857 2285__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2286_mm_mask3_fmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
756c5857 2287{
79fb4764
HJ
2288 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
2289 (__v4sf) __A,
2290 (__v4sf) __B,
2291 (__mmask8) __U,
2292 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2293}
2294
79fb4764 2295extern __inline __m128d
756c5857 2296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2297_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
756c5857 2298{
79fb4764
HJ
2299 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
2300 (__v2df) __A,
2301 -(__v2df) __B,
2302 (__mmask8) __U,
2303 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2304}
2305
79fb4764 2306extern __inline __m128
756c5857 2307__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2308_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
756c5857 2309{
79fb4764
HJ
2310 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
2311 (__v4sf) __A,
2312 -(__v4sf) __B,
2313 (__mmask8) __U,
2314 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2315}
2316
79fb4764 2317extern __inline __m128d
756c5857 2318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2319_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
756c5857 2320{
79fb4764
HJ
2321 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
2322 -(__v2df) __A,
2323 (__v2df) __B,
2324 (__mmask8) __U,
2325 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2326}
2327
79fb4764 2328extern __inline __m128
756c5857 2329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2330_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
756c5857 2331{
79fb4764
HJ
2332 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
2333 -(__v4sf) __A,
2334 (__v4sf) __B,
2335 (__mmask8) __U,
2336 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2337}
2338
79fb4764 2339extern __inline __m128d
756c5857 2340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2341_mm_mask3_fnmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
756c5857 2342{
79fb4764
HJ
2343 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
2344 -(__v2df) __A,
2345 (__v2df) __B,
2346 (__mmask8) __U,
2347 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2348}
2349
79fb4764 2350extern __inline __m128
756c5857 2351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2352_mm_mask3_fnmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
756c5857 2353{
79fb4764
HJ
2354 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
2355 -(__v4sf) __A,
2356 (__v4sf) __B,
2357 (__mmask8) __U,
2358 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2359}
2360
79fb4764 2361extern __inline __m128d
756c5857 2362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2363_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
756c5857 2364{
79fb4764
HJ
2365 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
2366 -(__v2df) __A,
2367 (__v2df) __B,
2368 (__mmask8) __U,
2369 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2370}
2371
79fb4764 2372extern __inline __m128
756c5857 2373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2374_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
756c5857 2375{
79fb4764
HJ
2376 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
2377 -(__v4sf) __A,
2378 (__v4sf) __B,
2379 (__mmask8) __U,
2380 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2381}
2382
79fb4764 2383extern __inline __m128d
756c5857 2384__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2385_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
756c5857 2386{
79fb4764
HJ
2387 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
2388 -(__v2df) __A,
2389 -(__v2df) __B,
2390 (__mmask8) __U,
2391 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2392}
2393
79fb4764 2394extern __inline __m128
756c5857 2395__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2396_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
756c5857 2397{
79fb4764
HJ
2398 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
2399 -(__v4sf) __A,
2400 -(__v4sf) __B,
2401 (__mmask8) __U,
2402 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2403}
2404
79fb4764 2405extern __inline __m128d
756c5857 2406__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2407_mm_mask3_fnmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
756c5857 2408{
79fb4764
HJ
2409 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
2410 -(__v2df) __A,
2411 (__v2df) __B,
2412 (__mmask8) __U,
2413 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2414}
2415
79fb4764 2416extern __inline __m128
756c5857 2417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2418_mm_mask3_fnmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
756c5857 2419{
79fb4764
HJ
2420 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
2421 -(__v4sf) __A,
2422 (__v4sf) __B,
2423 (__mmask8) __U,
2424 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2425}
2426
79fb4764 2427extern __inline __m128d
756c5857 2428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2429_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
756c5857 2430{
79fb4764
HJ
2431 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
2432 -(__v2df) __A,
2433 -(__v2df) __B,
2434 (__mmask8) __U,
2435 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2436}
2437
79fb4764 2438extern __inline __m128
756c5857 2439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2440_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
756c5857 2441{
79fb4764
HJ
2442 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
2443 -(__v4sf) __A,
2444 -(__v4sf) __B,
2445 (__mmask8) __U,
2446 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
2447}
2448
79fb4764
HJ
2449#ifdef __OPTIMIZE__
2450extern __inline __m128d
756c5857 2451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2452_mm_mask_fmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
2453 const int __R)
756c5857 2454{
79fb4764
HJ
2455 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
2456 (__v2df) __A,
2457 (__v2df) __B,
2458 (__mmask8) __U, __R);
756c5857
AI
2459}
2460
79fb4764 2461extern __inline __m128
756c5857 2462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2463_mm_mask_fmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
2464 const int __R)
756c5857 2465{
79fb4764
HJ
2466 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
2467 (__v4sf) __A,
2468 (__v4sf) __B,
2469 (__mmask8) __U, __R);
756c5857
AI
2470}
2471
79fb4764 2472extern __inline __m128d
756c5857 2473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2474_mm_mask3_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
2475 const int __R)
756c5857 2476{
79fb4764
HJ
2477 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
2478 (__v2df) __A,
2479 (__v2df) __B,
2480 (__mmask8) __U, __R);
756c5857
AI
2481}
2482
79fb4764 2483extern __inline __m128
756c5857 2484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2485_mm_mask3_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
2486 const int __R)
756c5857 2487{
79fb4764
HJ
2488 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
2489 (__v4sf) __A,
2490 (__v4sf) __B,
2491 (__mmask8) __U, __R);
756c5857
AI
2492}
2493
79fb4764 2494extern __inline __m128d
756c5857 2495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2496_mm_maskz_fmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
2497 const int __R)
756c5857 2498{
79fb4764
HJ
2499 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
2500 (__v2df) __A,
2501 (__v2df) __B,
2502 (__mmask8) __U, __R);
756c5857
AI
2503}
2504
79fb4764 2505extern __inline __m128
756c5857 2506__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2507_mm_maskz_fmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
2508 const int __R)
756c5857 2509{
79fb4764
HJ
2510 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
2511 (__v4sf) __A,
2512 (__v4sf) __B,
2513 (__mmask8) __U, __R);
756c5857
AI
2514}
2515
79fb4764 2516extern __inline __m128d
756c5857 2517__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2518_mm_mask_fmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
2519 const int __R)
756c5857 2520{
79fb4764
HJ
2521 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
2522 (__v2df) __A,
2523 -(__v2df) __B,
2524 (__mmask8) __U, __R);
756c5857
AI
2525}
2526
79fb4764 2527extern __inline __m128
756c5857 2528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2529_mm_mask_fmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
2530 const int __R)
756c5857 2531{
79fb4764
HJ
2532 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
2533 (__v4sf) __A,
2534 -(__v4sf) __B,
756c5857
AI
2535 (__mmask8) __U, __R);
2536}
2537
79fb4764 2538extern __inline __m128d
756c5857 2539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2540_mm_mask3_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
2541 const int __R)
756c5857 2542{
79fb4764
HJ
2543 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
2544 (__v2df) __A,
2545 (__v2df) __B,
2546 (__mmask8) __U, __R);
756c5857
AI
2547}
2548
79fb4764 2549extern __inline __m128
756c5857 2550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2551_mm_mask3_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
2552 const int __R)
756c5857 2553{
79fb4764
HJ
2554 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
2555 (__v4sf) __A,
2556 (__v4sf) __B,
2557 (__mmask8) __U, __R);
756c5857
AI
2558}
2559
79fb4764 2560extern __inline __m128d
756c5857 2561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2562_mm_maskz_fmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
2563 const int __R)
756c5857 2564{
79fb4764
HJ
2565 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
2566 (__v2df) __A,
2567 -(__v2df) __B,
2568 (__mmask8) __U, __R);
756c5857
AI
2569}
2570
79fb4764 2571extern __inline __m128
756c5857 2572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2573_mm_maskz_fmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
2574 const int __R)
756c5857 2575{
79fb4764
HJ
2576 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
2577 (__v4sf) __A,
2578 -(__v4sf) __B,
2579 (__mmask8) __U, __R);
756c5857
AI
2580}
2581
79fb4764 2582extern __inline __m128d
756c5857 2583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2584_mm_mask_fnmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
2585 const int __R)
756c5857 2586{
79fb4764
HJ
2587 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
2588 -(__v2df) __A,
2589 (__v2df) __B,
2590 (__mmask8) __U, __R);
756c5857
AI
2591}
2592
79fb4764 2593extern __inline __m128
756c5857 2594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2595_mm_mask_fnmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
2596 const int __R)
756c5857 2597{
79fb4764
HJ
2598 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
2599 -(__v4sf) __A,
2600 (__v4sf) __B,
756c5857
AI
2601 (__mmask8) __U, __R);
2602}
2603
79fb4764 2604extern __inline __m128d
756c5857 2605__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2606_mm_mask3_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
2607 const int __R)
756c5857 2608{
79fb4764
HJ
2609 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
2610 -(__v2df) __A,
2611 (__v2df) __B,
2612 (__mmask8) __U, __R);
756c5857
AI
2613}
2614
79fb4764 2615extern __inline __m128
756c5857 2616__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2617_mm_mask3_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
2618 const int __R)
756c5857 2619{
79fb4764
HJ
2620 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
2621 -(__v4sf) __A,
2622 (__v4sf) __B,
2623 (__mmask8) __U, __R);
756c5857
AI
2624}
2625
79fb4764 2626extern __inline __m128d
756c5857 2627__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2628_mm_maskz_fnmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
2629 const int __R)
756c5857 2630{
79fb4764
HJ
2631 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
2632 -(__v2df) __A,
2633 (__v2df) __B,
2634 (__mmask8) __U, __R);
756c5857 2635}
756c5857 2636
79fb4764 2637extern __inline __m128
756c5857 2638__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2639_mm_maskz_fnmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
2640 const int __R)
756c5857 2641{
79fb4764
HJ
2642 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
2643 -(__v4sf) __A,
2644 (__v4sf) __B,
2645 (__mmask8) __U, __R);
756c5857
AI
2646}
2647
79fb4764 2648extern __inline __m128d
756c5857 2649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2650_mm_mask_fnmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
2651 const int __R)
756c5857 2652{
79fb4764
HJ
2653 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
2654 -(__v2df) __A,
2655 -(__v2df) __B,
2656 (__mmask8) __U, __R);
756c5857
AI
2657}
2658
79fb4764 2659extern __inline __m128
756c5857 2660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2661_mm_mask_fnmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
2662 const int __R)
756c5857 2663{
79fb4764
HJ
2664 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
2665 -(__v4sf) __A,
2666 -(__v4sf) __B,
756c5857
AI
2667 (__mmask8) __U, __R);
2668}
2669
79fb4764 2670extern __inline __m128d
756c5857 2671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2672_mm_mask3_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
2673 const int __R)
756c5857 2674{
79fb4764
HJ
2675 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
2676 -(__v2df) __A,
2677 (__v2df) __B,
2678 (__mmask8) __U, __R);
756c5857
AI
2679}
2680
79fb4764 2681extern __inline __m128
756c5857 2682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2683_mm_mask3_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
2684 const int __R)
756c5857 2685{
79fb4764
HJ
2686 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
2687 -(__v4sf) __A,
2688 (__v4sf) __B,
2689 (__mmask8) __U, __R);
756c5857
AI
2690}
2691
79fb4764 2692extern __inline __m128d
756c5857 2693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2694_mm_maskz_fnmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
2695 const int __R)
756c5857 2696{
79fb4764
HJ
2697 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
2698 -(__v2df) __A,
2699 -(__v2df) __B,
2700 (__mmask8) __U, __R);
756c5857
AI
2701}
2702
79fb4764 2703extern __inline __m128
756c5857 2704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2705_mm_maskz_fnmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
2706 const int __R)
756c5857 2707{
79fb4764
HJ
2708 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
2709 -(__v4sf) __A,
2710 -(__v4sf) __B,
2711 (__mmask8) __U, __R);
756c5857 2712}
79fb4764
HJ
2713#else
2714#define _mm_mask_fmadd_round_sd(A, U, B, C, R) \
2715 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, C, U, R)
756c5857 2716
79fb4764
HJ
2717#define _mm_mask_fmadd_round_ss(A, U, B, C, R) \
2718 (__m128) __builtin_ia32_vfmaddss3_mask (A, B, C, U, R)
756c5857 2719
79fb4764
HJ
2720#define _mm_mask3_fmadd_round_sd(A, B, C, U, R) \
2721 (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, C, U, R)
756c5857 2722
79fb4764
HJ
2723#define _mm_mask3_fmadd_round_ss(A, B, C, U, R) \
2724 (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R)
756c5857 2725
79fb4764
HJ
2726#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
2727 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, C, U, R)
756c5857 2728
79fb4764
HJ
2729#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
2730 (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, C, U, R)
2731
2732#define _mm_mask_fmsub_round_sd(A, U, B, C, R) \
2733 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, -(C), U, R)
2734
2735#define _mm_mask_fmsub_round_ss(A, U, B, C, R) \
2736 (__m128) __builtin_ia32_vfmaddss3_mask (A, B, -(C), U, R)
2737
2738#define _mm_mask3_fmsub_round_sd(A, B, C, U, R) \
2739 (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, B, C, U, R)
2740
2741#define _mm_mask3_fmsub_round_ss(A, B, C, U, R) \
2742 (__m128) __builtin_ia32_vfmsubss3_mask3 (A, B, C, U, R)
2743
2744#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
2745 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, -(C), U, R)
2746
2747#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
2748 (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, -(C), U, R)
2749
2750#define _mm_mask_fnmadd_round_sd(A, U, B, C, R) \
2751 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), C, U, R)
2752
2753#define _mm_mask_fnmadd_round_ss(A, U, B, C, R) \
2754 (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), C, U, R)
2755
2756#define _mm_mask3_fnmadd_round_sd(A, B, C, U, R) \
2757 (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), C, U, R)
2758
2759#define _mm_mask3_fnmadd_round_ss(A, B, C, U, R) \
2760 (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R)
2761
2762#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
2763 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), C, U, R)
2764
2765#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
2766 (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), C, U, R)
2767
2768#define _mm_mask_fnmsub_round_sd(A, U, B, C, R) \
2769 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), -(C), U, R)
2770
2771#define _mm_mask_fnmsub_round_ss(A, U, B, C, R) \
2772 (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), -(C), U, R)
2773
2774#define _mm_mask3_fnmsub_round_sd(A, B, C, U, R) \
2775 (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, -(B), C, U, R)
2776
2777#define _mm_mask3_fnmsub_round_ss(A, B, C, U, R) \
2778 (__m128) __builtin_ia32_vfmsubss3_mask3 (A, -(B), C, U, R)
2779
2780#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
2781 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), -(C), U, R)
2782
2783#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
2784 (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), -(C), U, R)
2785#endif
2786
2787#ifdef __OPTIMIZE__
2788extern __inline int
2789__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2790_mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
2791{
2792 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
2793}
2794
2795extern __inline int
2796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2797_mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
2798{
2799 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
756c5857 2800}
79fb4764
HJ
2801#else
2802#define _mm_comi_round_ss(A, B, C, D)\
2803__builtin_ia32_vcomiss(A, B, C, D)
2804#define _mm_comi_round_sd(A, B, C, D)\
2805__builtin_ia32_vcomisd(A, B, C, D)
2806#endif
756c5857 2807
075691af
AI
2808extern __inline __m128d
2809__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2810_mm_mask_add_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
075691af 2811{
79fb4764
HJ
2812 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
2813 (__v2df) __B,
2814 (__v2df) __W,
2815 (__mmask8) __U,
2816 _MM_FROUND_CUR_DIRECTION);
075691af
AI
2817}
2818
f4ee3a9e
UB
2819extern __inline __m128d
2820__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2821_mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B)
2822{
2823 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
2824 (__v2df) __B,
2825 (__v2df)
fd79b414 2826 _mm_avx512_setzero_pd (),
79fb4764
HJ
2827 (__mmask8) __U,
2828 _MM_FROUND_CUR_DIRECTION);
2829}
2830
2831extern __inline __m128
2832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2833_mm_mask_add_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
2834{
2835 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
2836 (__v4sf) __B,
2837 (__v4sf) __W,
2838 (__mmask8) __U,
2839 _MM_FROUND_CUR_DIRECTION);
2840}
2841
2842extern __inline __m128
2843__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2844_mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B)
2845{
2846 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
2847 (__v4sf) __B,
2848 (__v4sf)
fd79b414 2849 _mm_avx512_setzero_ps (),
79fb4764
HJ
2850 (__mmask8) __U,
2851 _MM_FROUND_CUR_DIRECTION);
2852}
2853
2854extern __inline __m128d
2855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2856_mm_mask_sub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
2857{
2858 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
2859 (__v2df) __B,
2860 (__v2df) __W,
2861 (__mmask8) __U,
2862 _MM_FROUND_CUR_DIRECTION);
2863}
2864
2865extern __inline __m128d
2866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2867_mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B)
2868{
2869 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
2870 (__v2df) __B,
2871 (__v2df)
fd79b414 2872 _mm_avx512_setzero_pd (),
79fb4764
HJ
2873 (__mmask8) __U,
2874 _MM_FROUND_CUR_DIRECTION);
2875}
2876
2877extern __inline __m128
2878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2879_mm_mask_sub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
2880{
2881 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
2882 (__v4sf) __B,
2883 (__v4sf) __W,
2884 (__mmask8) __U,
2885 _MM_FROUND_CUR_DIRECTION);
2886}
2887
2888extern __inline __m128
2889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2890_mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B)
2891{
2892 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
2893 (__v4sf) __B,
2894 (__v4sf)
fd79b414 2895 _mm_avx512_setzero_ps (),
79fb4764
HJ
2896 (__mmask8) __U,
2897 _MM_FROUND_CUR_DIRECTION);
2898}
2899
2900extern __inline __m128d
2901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2902_mm_mask_mul_sd (__m128d __W, __mmask8 __U, __m128d __A,
2903 __m128d __B)
f4ee3a9e
UB
2904{
2905 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2906 (__v2df) __B,
2907 (__v2df) __W,
79fb4764
HJ
2908 (__mmask8) __U,
2909 _MM_FROUND_CUR_DIRECTION);
f4ee3a9e
UB
2910}
2911
2912extern __inline __m128d
2913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2914_mm_maskz_mul_sd (__mmask8 __U, __m128d __A, __m128d __B)
f4ee3a9e
UB
2915{
2916 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2917 (__v2df) __B,
2918 (__v2df)
fd79b414 2919 _mm_avx512_setzero_pd (),
79fb4764
HJ
2920 (__mmask8) __U,
2921 _MM_FROUND_CUR_DIRECTION);
075691af
AI
2922}
2923
f4ee3a9e
UB
2924extern __inline __m128
2925__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2926_mm_mask_mul_ss (__m128 __W, __mmask8 __U, __m128 __A,
2927 __m128 __B)
f4ee3a9e
UB
2928{
2929 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2930 (__v4sf) __B,
2931 (__v4sf) __W,
79fb4764
HJ
2932 (__mmask8) __U,
2933 _MM_FROUND_CUR_DIRECTION);
f4ee3a9e
UB
2934}
2935
2936extern __inline __m128
2937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2938_mm_maskz_mul_ss (__mmask8 __U, __m128 __A, __m128 __B)
f4ee3a9e
UB
2939{
2940 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2941 (__v4sf) __B,
2942 (__v4sf)
fd79b414 2943 _mm_avx512_setzero_ps (),
79fb4764
HJ
2944 (__mmask8) __U,
2945 _MM_FROUND_CUR_DIRECTION);
075691af
AI
2946}
2947
f4ee3a9e
UB
2948extern __inline __m128d
2949__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2950_mm_mask_div_sd (__m128d __W, __mmask8 __U, __m128d __A,
2951 __m128d __B)
f4ee3a9e
UB
2952{
2953 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2954 (__v2df) __B,
2955 (__v2df) __W,
79fb4764
HJ
2956 (__mmask8) __U,
2957 _MM_FROUND_CUR_DIRECTION);
f4ee3a9e
UB
2958}
2959
2960extern __inline __m128d
2961__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2962_mm_maskz_div_sd (__mmask8 __U, __m128d __A, __m128d __B)
f4ee3a9e
UB
2963{
2964 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2965 (__v2df) __B,
2966 (__v2df)
fd79b414 2967 _mm_avx512_setzero_pd (),
79fb4764
HJ
2968 (__mmask8) __U,
2969 _MM_FROUND_CUR_DIRECTION);
075691af
AI
2970}
2971
f4ee3a9e
UB
2972extern __inline __m128
2973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
2974_mm_mask_div_ss (__m128 __W, __mmask8 __U, __m128 __A,
2975 __m128 __B)
f4ee3a9e
UB
2976{
2977 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2978 (__v4sf) __B,
2979 (__v4sf) __W,
79fb4764
HJ
2980 (__mmask8) __U,
2981 _MM_FROUND_CUR_DIRECTION);
f4ee3a9e
UB
2982}
2983
2984extern __inline __m128
2985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2986_mm_maskz_div_ss (__mmask8 __U, __m128 __A, __m128 __B)
f4ee3a9e
UB
2987{
2988 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2989 (__v4sf) __B,
2990 (__v4sf)
fd79b414 2991 _mm_avx512_setzero_ps (),
79fb4764
HJ
2992 (__mmask8) __U,
2993 _MM_FROUND_CUR_DIRECTION);
f4ee3a9e
UB
2994}
2995
79fb4764 2996extern __inline __m128d
756c5857 2997__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 2998_mm_mask_max_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
756c5857 2999{
79fb4764
HJ
3000 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
3001 (__v2df) __B,
3002 (__v2df) __W,
3003 (__mmask8) __U,
3004 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3005}
3006
79fb4764 3007extern __inline __m128d
756c5857 3008__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3009_mm_maskz_max_sd (__mmask8 __U, __m128d __A, __m128d __B)
756c5857 3010{
79fb4764
HJ
3011 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
3012 (__v2df) __B,
3013 (__v2df)
fd79b414 3014 _mm_avx512_setzero_pd (),
79fb4764
HJ
3015 (__mmask8) __U,
3016 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3017}
3018
79fb4764 3019extern __inline __m128
756c5857 3020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3021_mm_mask_max_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
756c5857 3022{
79fb4764
HJ
3023 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
3024 (__v4sf) __B,
3025 (__v4sf) __W,
3026 (__mmask8) __U,
3027 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3028}
3029
79fb4764 3030extern __inline __m128
756c5857 3031__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3032_mm_maskz_max_ss (__mmask8 __U, __m128 __A, __m128 __B)
756c5857 3033{
79fb4764
HJ
3034 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
3035 (__v4sf) __B,
3036 (__v4sf)
fd79b414 3037 _mm_avx512_setzero_ps (),
79fb4764
HJ
3038 (__mmask8) __U,
3039 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3040}
3041
79fb4764 3042extern __inline __m128d
756c5857 3043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3044_mm_mask_min_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
756c5857 3045{
79fb4764
HJ
3046 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
3047 (__v2df) __B,
3048 (__v2df) __W,
3049 (__mmask8) __U,
3050 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3051}
3052
79fb4764 3053extern __inline __m128d
756c5857 3054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3055_mm_maskz_min_sd (__mmask8 __U, __m128d __A, __m128d __B)
756c5857 3056{
79fb4764
HJ
3057 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
3058 (__v2df) __B,
3059 (__v2df)
fd79b414 3060 _mm_avx512_setzero_pd (),
79fb4764
HJ
3061 (__mmask8) __U,
3062 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3063}
3064
79fb4764 3065extern __inline __m128
756c5857 3066__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3067_mm_mask_min_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
756c5857 3068{
79fb4764
HJ
3069 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
3070 (__v4sf) __B,
3071 (__v4sf) __W,
3072 (__mmask8) __U,
3073 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3074}
3075
79fb4764 3076extern __inline __m128
756c5857 3077__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3078_mm_maskz_min_ss (__mmask8 __U, __m128 __A, __m128 __B)
756c5857 3079{
79fb4764
HJ
3080 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
3081 (__v4sf) __B,
3082 (__v4sf)
fd79b414 3083 _mm_avx512_setzero_ps (),
79fb4764
HJ
3084 (__mmask8) __U,
3085 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3086}
3087
79fb4764 3088extern __inline __m128d
756c5857 3089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3090_mm_scalef_sd (__m128d __A, __m128d __B)
756c5857 3091{
79fb4764
HJ
3092 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3093 (__v2df) __B,
3094 (__v2df)
fd79b414 3095 _mm_avx512_setzero_pd (),
79fb4764
HJ
3096 (__mmask8) -1,
3097 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3098}
3099
79fb4764 3100extern __inline __m128
756c5857 3101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3102_mm_scalef_ss (__m128 __A, __m128 __B)
756c5857 3103{
79fb4764
HJ
3104 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3105 (__v4sf) __B,
3106 (__v4sf)
fd79b414 3107 _mm_avx512_setzero_ps (),
79fb4764
HJ
3108 (__mmask8) -1,
3109 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3110}
3111
79fb4764
HJ
3112#ifdef __x86_64__
3113extern __inline __m128
756c5857 3114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3115_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
756c5857 3116{
79fb4764
HJ
3117 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
3118 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3119}
3120
79fb4764 3121extern __inline __m128d
756c5857 3122__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3123_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
756c5857 3124{
79fb4764
HJ
3125 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
3126 _MM_FROUND_CUR_DIRECTION);
756c5857 3127}
79fb4764 3128#endif
756c5857 3129
79fb4764
HJ
3130extern __inline __m128
3131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3132_mm_cvtu32_ss (__m128 __A, unsigned __B)
3133{
3134 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
3135 _MM_FROUND_CUR_DIRECTION);
3136}
756c5857 3137
79fb4764
HJ
3138#ifdef __OPTIMIZE__
3139extern __inline __m128d
3140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3141_mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
3142{
3143 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
3144 (__v2df) __B,
3145 (__v2di) __C, __imm,
3146 (__mmask8) -1,
3147 _MM_FROUND_CUR_DIRECTION);
3148}
756c5857 3149
79fb4764
HJ
3150extern __inline __m128d
3151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3152_mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
3153 __m128i __C, const int __imm)
3154{
3155 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
3156 (__v2df) __B,
3157 (__v2di) __C, __imm,
3158 (__mmask8) __U,
3159 _MM_FROUND_CUR_DIRECTION);
3160}
756c5857 3161
79fb4764
HJ
3162extern __inline __m128d
3163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3164_mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
3165 __m128i __C, const int __imm)
3166{
3167 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
3168 (__v2df) __B,
3169 (__v2di) __C,
3170 __imm,
3171 (__mmask8) __U,
3172 _MM_FROUND_CUR_DIRECTION);
3173}
756c5857 3174
79fb4764
HJ
3175extern __inline __m128
3176__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3177_mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
3178{
3179 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
3180 (__v4sf) __B,
3181 (__v4si) __C, __imm,
3182 (__mmask8) -1,
3183 _MM_FROUND_CUR_DIRECTION);
3184}
756c5857 3185
79fb4764
HJ
3186extern __inline __m128
3187__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3188_mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
3189 __m128i __C, const int __imm)
3190{
3191 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
3192 (__v4sf) __B,
3193 (__v4si) __C, __imm,
3194 (__mmask8) __U,
3195 _MM_FROUND_CUR_DIRECTION);
3196}
756c5857 3197
79fb4764
HJ
3198extern __inline __m128
3199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3200_mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
3201 __m128i __C, const int __imm)
3202{
3203 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
3204 (__v4sf) __B,
3205 (__v4si) __C, __imm,
3206 (__mmask8) __U,
3207 _MM_FROUND_CUR_DIRECTION);
3208}
756c5857 3209
79fb4764
HJ
3210#else
3211#define _mm_fixupimm_sd(X, Y, Z, C) \
3212 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
3213 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
3214 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857 3215
79fb4764
HJ
3216#define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
3217 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
3218 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
3219 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
3220
3221#define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
3222 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
3223 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
3224 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
3225
3226#define _mm_fixupimm_ss(X, Y, Z, C) \
3227 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
3228 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
3229 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
3230
3231#define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
3232 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
3233 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
3234 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
3235
3236#define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
3237 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
3238 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
3239 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
756c5857 3240
756c5857
AI
3241#endif
3242
79fb4764
HJ
3243#ifdef __x86_64__
3244extern __inline unsigned long long
756c5857 3245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3246_mm_cvtss_u64 (__m128 __A)
756c5857 3247{
79fb4764
HJ
3248 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
3249 __A,
3250 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3251}
3252
79fb4764 3253extern __inline unsigned long long
756c5857 3254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3255_mm_cvttss_u64 (__m128 __A)
756c5857 3256{
79fb4764
HJ
3257 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
3258 __A,
3259 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3260}
3261
79fb4764 3262extern __inline long long
756c5857 3263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3264_mm_cvttss_i64 (__m128 __A)
756c5857 3265{
79fb4764
HJ
3266 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
3267 _MM_FROUND_CUR_DIRECTION);
756c5857 3268}
79fb4764 3269#endif /* __x86_64__ */
756c5857 3270
79fb4764 3271extern __inline unsigned
756c5857 3272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3273_mm_cvtss_u32 (__m128 __A)
756c5857 3274{
79fb4764
HJ
3275 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
3276 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3277}
3278
79fb4764 3279extern __inline unsigned
756c5857 3280__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3281_mm_cvttss_u32 (__m128 __A)
756c5857 3282{
79fb4764
HJ
3283 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
3284 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3285}
3286
79fb4764 3287extern __inline int
756c5857 3288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3289_mm_cvttss_i32 (__m128 __A)
756c5857 3290{
79fb4764
HJ
3291 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
3292 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3293}
3294
79fb4764 3295extern __inline int
075691af 3296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3297_mm_cvtsd_i32 (__m128d __A)
075691af 3298{
79fb4764 3299 return (int) __builtin_ia32_cvtsd2si ((__v2df) __A);
158061a6
OM
3300}
3301
79fb4764 3302extern __inline int
158061a6 3303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3304_mm_cvtss_i32 (__m128 __A)
158061a6 3305{
79fb4764 3306 return (int) __builtin_ia32_cvtss2si ((__v4sf) __A);
158061a6
OM
3307}
3308
3309extern __inline __m128d
3310__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3311_mm_cvti32_sd (__m128d __A, int __B)
158061a6 3312{
79fb4764 3313 return (__m128d) __builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
075691af
AI
3314}
3315
3316extern __inline __m128
3317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3318_mm_cvti32_ss (__m128 __A, int __B)
075691af 3319{
79fb4764 3320 return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
158061a6
OM
3321}
3322
79fb4764
HJ
3323#ifdef __x86_64__
3324extern __inline unsigned long long
158061a6 3325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3326_mm_cvtsd_u64 (__m128d __A)
158061a6 3327{
79fb4764
HJ
3328 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
3329 __A,
3330 _MM_FROUND_CUR_DIRECTION);
158061a6
OM
3331}
3332
79fb4764 3333extern __inline unsigned long long
158061a6 3334__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3335_mm_cvttsd_u64 (__m128d __A)
158061a6 3336{
79fb4764
HJ
3337 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
3338 __A,
3339 _MM_FROUND_CUR_DIRECTION);
075691af 3340}
3c940d42 3341
79fb4764 3342extern __inline long long
756c5857 3343__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3344_mm_cvttsd_i64 (__m128d __A)
756c5857 3345{
79fb4764
HJ
3346 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
3347 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3348}
3349
79fb4764 3350extern __inline long long
756c5857 3351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3352_mm_cvtsd_i64 (__m128d __A)
756c5857 3353{
79fb4764 3354 return (long long) __builtin_ia32_cvtsd2si64 ((__v2df) __A);
756c5857
AI
3355}
3356
79fb4764 3357extern __inline long long
756c5857 3358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3359_mm_cvtss_i64 (__m128 __A)
756c5857 3360{
79fb4764 3361 return (long long) __builtin_ia32_cvtss2si64 ((__v4sf) __A);
756c5857
AI
3362}
3363
79fb4764 3364extern __inline __m128d
756c5857 3365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3366_mm_cvti64_sd (__m128d __A, long long __B)
756c5857 3367{
79fb4764 3368 return (__m128d) __builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
756c5857
AI
3369}
3370
79fb4764 3371extern __inline __m128
756c5857 3372__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3373_mm_cvti64_ss (__m128 __A, long long __B)
756c5857 3374{
79fb4764 3375 return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
756c5857 3376}
79fb4764 3377#endif /* __x86_64__ */
756c5857 3378
79fb4764 3379extern __inline unsigned
756c5857 3380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3381_mm_cvtsd_u32 (__m128d __A)
756c5857 3382{
79fb4764
HJ
3383 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
3384 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3385}
3386
79fb4764 3387extern __inline unsigned
756c5857 3388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3389_mm_cvttsd_u32 (__m128d __A)
756c5857 3390{
79fb4764
HJ
3391 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
3392 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3393}
3394
79fb4764 3395extern __inline int
756c5857 3396__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3397_mm_cvttsd_i32 (__m128d __A)
756c5857 3398{
79fb4764
HJ
3399 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
3400 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3401}
3402
79fb4764
HJ
3403#ifdef __OPTIMIZE__
3404extern __inline __m128
756c5857 3405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3406_mm_getexp_ss (__m128 __A, __m128 __B)
756c5857 3407{
79fb4764
HJ
3408 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
3409 (__v4sf) __B,
3410 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3411}
3412
79fb4764 3413extern __inline __m128
756c5857 3414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3415_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
756c5857 3416{
79fb4764
HJ
3417 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
3418 (__v4sf) __B,
3419 (__v4sf) __W,
3420 (__mmask8) __U,
3421 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3422}
3423
79fb4764 3424extern __inline __m128
756c5857 3425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3426_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
756c5857 3427{
79fb4764
HJ
3428 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
3429 (__v4sf) __B,
3430 (__v4sf)
fd79b414 3431 _mm_avx512_setzero_ps (),
79fb4764
HJ
3432 (__mmask8) __U,
3433 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3434}
3435
79fb4764 3436extern __inline __m128d
756c5857 3437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3438_mm_getexp_sd (__m128d __A, __m128d __B)
756c5857 3439{
79fb4764
HJ
3440 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
3441 (__v2df) __B,
3442 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3443}
3444
79fb4764 3445extern __inline __m128d
756c5857 3446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3447_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
756c5857 3448{
79fb4764
HJ
3449 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
3450 (__v2df) __B,
3451 (__v2df) __W,
3452 (__mmask8) __U,
3453 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3454}
3455
79fb4764 3456extern __inline __m128d
756c5857 3457__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3458_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
756c5857 3459{
79fb4764
HJ
3460 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
3461 (__v2df) __B,
3462 (__v2df)
fd79b414 3463 _mm_avx512_setzero_pd (),
79fb4764
HJ
3464 (__mmask8) __U,
3465 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3466}
3467
79fb4764 3468extern __inline __m128d
756c5857 3469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
3470_mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
3471 _MM_MANTISSA_SIGN_ENUM __D)
756c5857 3472{
79fb4764
HJ
3473 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
3474 (__v2df) __B,
3475 (__D << 2) | __C,
3476 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3477}
3478
79fb4764 3479extern __inline __m128d
756c5857 3480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
3481_mm_mask_getmant_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
3482 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
756c5857 3483{
79fb4764
HJ
3484 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
3485 (__v2df) __B,
3486 (__D << 2) | __C,
3487 (__v2df) __W,
3488 __U,
3489 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3490}
3491
79fb4764 3492extern __inline __m128d
756c5857 3493__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
3494_mm_maskz_getmant_sd (__mmask8 __U, __m128d __A, __m128d __B,
3495 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
756c5857 3496{
79fb4764
HJ
3497 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
3498 (__v2df) __B,
3499 (__D << 2) | __C,
3500 (__v2df)
fd79b414 3501 _mm_avx512_setzero_pd(),
79fb4764
HJ
3502 __U,
3503 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3504}
3505
79fb4764 3506extern __inline __m128
756c5857 3507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
3508_mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
3509 _MM_MANTISSA_SIGN_ENUM __D)
756c5857 3510{
79fb4764
HJ
3511 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
3512 (__v4sf) __B,
3513 (__D << 2) | __C,
3514 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3515}
3516
79fb4764 3517extern __inline __m128
756c5857 3518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
3519_mm_mask_getmant_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
3520 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
756c5857 3521{
79fb4764
HJ
3522 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
3523 (__v4sf) __B,
3524 (__D << 2) | __C,
3525 (__v4sf) __W,
3526 __U,
3527 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3528}
3529
79fb4764 3530extern __inline __m128
756c5857 3531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
3532_mm_maskz_getmant_ss (__mmask8 __U, __m128 __A, __m128 __B,
3533 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
756c5857 3534{
79fb4764
HJ
3535 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
3536 (__v4sf) __B,
3537 (__D << 2) | __C,
3538 (__v4sf)
fd79b414 3539 _mm_avx512_setzero_ps(),
79fb4764
HJ
3540 __U,
3541 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3542}
3543
79fb4764 3544extern __inline __m128
756c5857 3545__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3546_mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
756c5857 3547{
79fb4764
HJ
3548 return (__m128)
3549 __builtin_ia32_rndscaless_mask_round ((__v4sf) __A,
3550 (__v4sf) __B, __imm,
3551 (__v4sf)
fd79b414 3552 _mm_avx512_setzero_ps (),
79fb4764
HJ
3553 (__mmask8) -1,
3554 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3555}
3556
756c5857 3557
79fb4764 3558extern __inline __m128
756c5857 3559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
3560_mm_mask_roundscale_ss (__m128 __A, __mmask8 __B, __m128 __C, __m128 __D,
3561 const int __imm)
756c5857 3562{
79fb4764
HJ
3563 return (__m128)
3564 __builtin_ia32_rndscaless_mask_round ((__v4sf) __C,
3565 (__v4sf) __D, __imm,
3566 (__v4sf) __A,
3567 (__mmask8) __B,
3568 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3569}
3570
79fb4764 3571extern __inline __m128
756c5857 3572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
3573_mm_maskz_roundscale_ss (__mmask8 __A, __m128 __B, __m128 __C,
3574 const int __imm)
756c5857 3575{
79fb4764
HJ
3576 return (__m128)
3577 __builtin_ia32_rndscaless_mask_round ((__v4sf) __B,
3578 (__v4sf) __C, __imm,
3579 (__v4sf)
fd79b414 3580 _mm_avx512_setzero_ps (),
79fb4764
HJ
3581 (__mmask8) __A,
3582 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3583}
3584
79fb4764 3585extern __inline __m128d
756c5857 3586__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3587_mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
756c5857 3588{
79fb4764
HJ
3589 return (__m128d)
3590 __builtin_ia32_rndscalesd_mask_round ((__v2df) __A,
3591 (__v2df) __B, __imm,
3592 (__v2df)
fd79b414 3593 _mm_avx512_setzero_pd (),
79fb4764
HJ
3594 (__mmask8) -1,
3595 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3596}
3597
79fb4764 3598extern __inline __m128d
756c5857 3599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
3600_mm_mask_roundscale_sd (__m128d __A, __mmask8 __B, __m128d __C, __m128d __D,
3601 const int __imm)
756c5857 3602{
79fb4764
HJ
3603 return (__m128d)
3604 __builtin_ia32_rndscalesd_mask_round ((__v2df) __C,
3605 (__v2df) __D, __imm,
3606 (__v2df) __A,
3607 (__mmask8) __B,
3608 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3609}
3610
79fb4764 3611extern __inline __m128d
756c5857 3612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
3613_mm_maskz_roundscale_sd (__mmask8 __A, __m128d __B, __m128d __C,
3614 const int __imm)
756c5857 3615{
79fb4764
HJ
3616 return (__m128d)
3617 __builtin_ia32_rndscalesd_mask_round ((__v2df) __B,
3618 (__v2df) __C, __imm,
3619 (__v2df)
fd79b414 3620 _mm_avx512_setzero_pd (),
79fb4764
HJ
3621 (__mmask8) __A,
3622 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3623}
3624
79fb4764 3625extern __inline __mmask8
756c5857 3626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3627_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
756c5857 3628{
79fb4764
HJ
3629 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
3630 (__v2df) __Y, __P,
3631 (__mmask8) -1,
3632 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3633}
3634
79fb4764 3635extern __inline __mmask8
756c5857 3636__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3637_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
756c5857 3638{
79fb4764
HJ
3639 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
3640 (__v2df) __Y, __P,
3641 (__mmask8) __M,
3642 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3643}
3644
79fb4764 3645extern __inline __mmask8
756c5857 3646__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3647_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
756c5857 3648{
79fb4764
HJ
3649 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
3650 (__v4sf) __Y, __P,
3651 (__mmask8) -1,
3652 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3653}
3654
79fb4764 3655extern __inline __mmask8
756c5857 3656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3657_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
756c5857 3658{
79fb4764
HJ
3659 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
3660 (__v4sf) __Y, __P,
3661 (__mmask8) __M,
3662 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
3663}
3664
79fb4764
HJ
3665#else
3666#define _mm_getmant_sd(X, Y, C, D) \
3667 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
3668 (__v2df)(__m128d)(Y), \
3669 (int)(((D)<<2) | (C)), \
3670 _MM_FROUND_CUR_DIRECTION))
756c5857 3671
79fb4764
HJ
3672#define _mm_mask_getmant_sd(W, U, X, Y, C, D) \
3673 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
3674 (__v2df)(__m128d)(Y), \
3675 (int)(((D)<<2) | (C)), \
3676 (__v2df)(__m128d)(W), \
3677 (__mmask8)(U),\
3678 _MM_FROUND_CUR_DIRECTION))
756c5857 3679
79fb4764
HJ
3680#define _mm_maskz_getmant_sd(U, X, Y, C, D) \
3681 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
3682 (__v2df)(__m128d)(Y), \
3683 (int)(((D)<<2) | (C)), \
fd79b414 3684 (__v2df)_mm_avx512_setzero_pd(), \
79fb4764
HJ
3685 (__mmask8)(U),\
3686 _MM_FROUND_CUR_DIRECTION))
756c5857 3687
79fb4764
HJ
3688#define _mm_getmant_ss(X, Y, C, D) \
3689 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
3690 (__v4sf)(__m128)(Y), \
3691 (int)(((D)<<2) | (C)), \
3692 _MM_FROUND_CUR_DIRECTION))
756c5857 3693
79fb4764
HJ
3694#define _mm_mask_getmant_ss(W, U, X, Y, C, D) \
3695 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
3696 (__v4sf)(__m128)(Y), \
3697 (int)(((D)<<2) | (C)), \
3698 (__v4sf)(__m128)(W), \
3699 (__mmask8)(U),\
3700 _MM_FROUND_CUR_DIRECTION))
756c5857 3701
79fb4764
HJ
3702#define _mm_maskz_getmant_ss(U, X, Y, C, D) \
3703 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
3704 (__v4sf)(__m128)(Y), \
3705 (int)(((D)<<2) | (C)), \
fd79b414 3706 (__v4sf)_mm_avx512_setzero_ps(), \
79fb4764
HJ
3707 (__mmask8)(U),\
3708 _MM_FROUND_CUR_DIRECTION))
756c5857 3709
79fb4764
HJ
3710#define _mm_getexp_ss(A, B) \
3711 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
3712 _MM_FROUND_CUR_DIRECTION))
756c5857 3713
79fb4764
HJ
3714#define _mm_mask_getexp_ss(W, U, A, B) \
3715 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U,\
3716 _MM_FROUND_CUR_DIRECTION)
756c5857 3717
79fb4764 3718#define _mm_maskz_getexp_ss(U, A, B) \
fd79b414 3719 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U,\
79fb4764
HJ
3720 _MM_FROUND_CUR_DIRECTION)
3721
3722#define _mm_getexp_sd(A, B) \
3723 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
3724 _MM_FROUND_CUR_DIRECTION))
3725
3726#define _mm_mask_getexp_sd(W, U, A, B) \
3727 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U,\
3728 _MM_FROUND_CUR_DIRECTION)
3729
3730#define _mm_maskz_getexp_sd(U, A, B) \
fd79b414 3731 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U,\
79fb4764
HJ
3732 _MM_FROUND_CUR_DIRECTION)
3733
3734#define _mm_roundscale_ss(A, B, I) \
3735 ((__m128) \
3736 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
3737 (__v4sf) (__m128) (B), \
3738 (int) (I), \
fd79b414 3739 (__v4sf) _mm_avx512_setzero_ps (), \
79fb4764
HJ
3740 (__mmask8) (-1), \
3741 _MM_FROUND_CUR_DIRECTION))
3742#define _mm_mask_roundscale_ss(A, U, B, C, I) \
3743 ((__m128) \
3744 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (B), \
3745 (__v4sf) (__m128) (C), \
3746 (int) (I), \
3747 (__v4sf) (__m128) (A), \
3748 (__mmask8) (U), \
3749 _MM_FROUND_CUR_DIRECTION))
3750#define _mm_maskz_roundscale_ss(U, A, B, I) \
3751 ((__m128) \
3752 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
3753 (__v4sf) (__m128) (B), \
3754 (int) (I), \
fd79b414 3755 (__v4sf) _mm_avx512_setzero_ps (), \
79fb4764
HJ
3756 (__mmask8) (U), \
3757 _MM_FROUND_CUR_DIRECTION))
3758#define _mm_roundscale_sd(A, B, I) \
3759 ((__m128d) \
3760 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
3761 (__v2df) (__m128d) (B), \
3762 (int) (I), \
fd79b414 3763 (__v2df) _mm_avx512_setzero_pd (), \
79fb4764
HJ
3764 (__mmask8) (-1), \
3765 _MM_FROUND_CUR_DIRECTION))
3766#define _mm_mask_roundscale_sd(A, U, B, C, I) \
3767 ((__m128d) \
3768 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (B), \
3769 (__v2df) (__m128d) (C), \
3770 (int) (I), \
3771 (__v2df) (__m128d) (A), \
3772 (__mmask8) (U), \
3773 _MM_FROUND_CUR_DIRECTION))
3774#define _mm_maskz_roundscale_sd(U, A, B, I) \
3775 ((__m128d) \
3776 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
3777 (__v2df) (__m128d) (B), \
3778 (int) (I), \
fd79b414 3779 (__v2df) _mm_avx512_setzero_pd (), \
79fb4764
HJ
3780 (__mmask8) (U), \
3781 _MM_FROUND_CUR_DIRECTION))
3782
3783#define _mm_cmp_sd_mask(X, Y, P) \
3784 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
3785 (__v2df)(__m128d)(Y), (int)(P),\
3786 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
3787
3788#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
3789 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
3790 (__v2df)(__m128d)(Y), (int)(P),\
3791 M,_MM_FROUND_CUR_DIRECTION))
3792
3793#define _mm_cmp_ss_mask(X, Y, P) \
3794 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
3795 (__v4sf)(__m128)(Y), (int)(P), \
3796 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
3797
3798#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
3799 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
3800 (__v4sf)(__m128)(Y), (int)(P), \
3801 M,_MM_FROUND_CUR_DIRECTION))
3802
3803#endif
3804
3805#ifdef __DISABLE_AVX512F__
3806#undef __DISABLE_AVX512F__
3807#pragma GCC pop_options
3808#endif /* __DISABLE_AVX512F__ */
3809
3810#if !defined (__AVX512F__) || !defined (__EVEX512__)
3811#pragma GCC push_options
3812#pragma GCC target("avx512f,evex512")
3813#define __DISABLE_AVX512F_512__
3814#endif /* __AVX512F_512__ */
3815
3816/* Internal data types for implementing the intrinsics. */
3817typedef double __v8df __attribute__ ((__vector_size__ (64)));
3818typedef float __v16sf __attribute__ ((__vector_size__ (64)));
3819typedef long long __v8di __attribute__ ((__vector_size__ (64)));
3820typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
3821typedef int __v16si __attribute__ ((__vector_size__ (64)));
3822typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
3823typedef short __v32hi __attribute__ ((__vector_size__ (64)));
3824typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
3825typedef char __v64qi __attribute__ ((__vector_size__ (64)));
3826typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
3827
3828/* The Intel API is flexible enough that we must allow aliasing with other
3829 vector types, and their scalar components. */
3830typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
3831typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
3832typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
3833
3834/* Unaligned version of the same type. */
3835typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
3836typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
3837typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
3838
3839extern __inline __mmask16
756c5857 3840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3841_mm512_int2mask (int __M)
756c5857 3842{
79fb4764 3843 return (__mmask16) __M;
756c5857
AI
3844}
3845
79fb4764 3846extern __inline int
756c5857 3847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 3848_mm512_mask2int (__mmask16 __M)
756c5857 3849{
79fb4764 3850 return (int) __M;
756c5857
AI
3851}
3852
79fb4764 3853extern __inline __m512i
756c5857 3854__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
3855_mm512_set_epi64 (long long __A, long long __B, long long __C,
3856 long long __D, long long __E, long long __F,
3857 long long __G, long long __H)
756c5857 3858{
79fb4764
HJ
3859 return __extension__ (__m512i) (__v8di)
3860 { __H, __G, __F, __E, __D, __C, __B, __A };
756c5857
AI
3861}
3862
79fb4764
HJ
3863/* Create the vector [A B C D E F G H I J K L M N O P]. */
3864extern __inline __m512i
756c5857 3865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
3866_mm512_set_epi32 (int __A, int __B, int __C, int __D,
3867 int __E, int __F, int __G, int __H,
3868 int __I, int __J, int __K, int __L,
3869 int __M, int __N, int __O, int __P)
756c5857 3870{
79fb4764
HJ
3871 return __extension__ (__m512i)(__v16si)
3872 { __P, __O, __N, __M, __L, __K, __J, __I,
3873 __H, __G, __F, __E, __D, __C, __B, __A };
756c5857
AI
3874}
3875
79fb4764
HJ
3876extern __inline __m512i
3877__attribute__((__gnu_inline__, __always_inline__, __artificial__))
3878_mm512_set_epi16 (short __q31, short __q30, short __q29, short __q28,
3879 short __q27, short __q26, short __q25, short __q24,
3880 short __q23, short __q22, short __q21, short __q20,
3881 short __q19, short __q18, short __q17, short __q16,
3882 short __q15, short __q14, short __q13, short __q12,
3883 short __q11, short __q10, short __q09, short __q08,
3884 short __q07, short __q06, short __q05, short __q04,
3885 short __q03, short __q02, short __q01, short __q00)
756c5857 3886{
79fb4764
HJ
3887 return __extension__ (__m512i)(__v32hi){
3888 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
3889 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
3890 __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
3891 __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31
3892 };
756c5857
AI
3893}
3894
79fb4764
HJ
3895extern __inline __m512i
3896__attribute__((__gnu_inline__, __always_inline__, __artificial__))
3897_mm512_set_epi8 (char __q63, char __q62, char __q61, char __q60,
3898 char __q59, char __q58, char __q57, char __q56,
3899 char __q55, char __q54, char __q53, char __q52,
3900 char __q51, char __q50, char __q49, char __q48,
3901 char __q47, char __q46, char __q45, char __q44,
3902 char __q43, char __q42, char __q41, char __q40,
3903 char __q39, char __q38, char __q37, char __q36,
3904 char __q35, char __q34, char __q33, char __q32,
3905 char __q31, char __q30, char __q29, char __q28,
3906 char __q27, char __q26, char __q25, char __q24,
3907 char __q23, char __q22, char __q21, char __q20,
3908 char __q19, char __q18, char __q17, char __q16,
3909 char __q15, char __q14, char __q13, char __q12,
3910 char __q11, char __q10, char __q09, char __q08,
3911 char __q07, char __q06, char __q05, char __q04,
3912 char __q03, char __q02, char __q01, char __q00)
756c5857 3913{
79fb4764
HJ
3914 return __extension__ (__m512i)(__v64qi){
3915 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
3916 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
3917 __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
3918 __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31,
3919 __q32, __q33, __q34, __q35, __q36, __q37, __q38, __q39,
3920 __q40, __q41, __q42, __q43, __q44, __q45, __q46, __q47,
3921 __q48, __q49, __q50, __q51, __q52, __q53, __q54, __q55,
3922 __q56, __q57, __q58, __q59, __q60, __q61, __q62, __q63
3923 };
756c5857
AI
3924}
3925
79fb4764 3926extern __inline __m512d
756c5857 3927__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
3928_mm512_set_pd (double __A, double __B, double __C, double __D,
3929 double __E, double __F, double __G, double __H)
756c5857 3930{
79fb4764
HJ
3931 return __extension__ (__m512d)
3932 { __H, __G, __F, __E, __D, __C, __B, __A };
756c5857
AI
3933}
3934
3935extern __inline __m512
3936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
3937_mm512_set_ps (float __A, float __B, float __C, float __D,
3938 float __E, float __F, float __G, float __H,
3939 float __I, float __J, float __K, float __L,
3940 float __M, float __N, float __O, float __P)
756c5857 3941{
79fb4764
HJ
3942 return __extension__ (__m512)
3943 { __P, __O, __N, __M, __L, __K, __J, __I,
3944 __H, __G, __F, __E, __D, __C, __B, __A };
756c5857 3945}
756c5857 3946
79fb4764
HJ
3947#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
3948 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
756c5857 3949
79fb4764
HJ
3950#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
3951 e8,e9,e10,e11,e12,e13,e14,e15) \
3952 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
756c5857 3953
79fb4764
HJ
3954#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
3955 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
756c5857 3956
79fb4764
HJ
3957#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
3958 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
756c5857 3959
79fb4764
HJ
3960extern __inline __m512
3961__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3962_mm512_undefined_ps (void)
3963{
3964#pragma GCC diagnostic push
3965#pragma GCC diagnostic ignored "-Winit-self"
3966 __m512 __Y = __Y;
3967#pragma GCC diagnostic pop
3968 return __Y;
3969}
756c5857 3970
79fb4764 3971#define _mm512_undefined _mm512_undefined_ps
756c5857 3972
79fb4764
HJ
3973extern __inline __m512d
3974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3975_mm512_undefined_pd (void)
3976{
3977#pragma GCC diagnostic push
3978#pragma GCC diagnostic ignored "-Winit-self"
3979 __m512d __Y = __Y;
3980#pragma GCC diagnostic pop
3981 return __Y;
3982}
756c5857 3983
79fb4764
HJ
3984extern __inline __m512i
3985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3986_mm512_undefined_epi32 (void)
3987{
3988#pragma GCC diagnostic push
3989#pragma GCC diagnostic ignored "-Winit-self"
3990 __m512i __Y = __Y;
3991#pragma GCC diagnostic pop
3992 return __Y;
3993}
756c5857 3994
79fb4764 3995#define _mm512_undefined_si512 _mm512_undefined_epi32
756c5857 3996
79fb4764
HJ
3997extern __inline __m512i
3998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3999_mm512_set1_epi8 (char __A)
4000{
4001 return __extension__ (__m512i)(__v64qi)
4002 { __A, __A, __A, __A, __A, __A, __A, __A,
4003 __A, __A, __A, __A, __A, __A, __A, __A,
4004 __A, __A, __A, __A, __A, __A, __A, __A,
4005 __A, __A, __A, __A, __A, __A, __A, __A,
4006 __A, __A, __A, __A, __A, __A, __A, __A,
4007 __A, __A, __A, __A, __A, __A, __A, __A,
4008 __A, __A, __A, __A, __A, __A, __A, __A,
4009 __A, __A, __A, __A, __A, __A, __A, __A };
4010}
756c5857 4011
79fb4764
HJ
4012extern __inline __m512i
4013__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4014_mm512_set1_epi16 (short __A)
4015{
4016 return __extension__ (__m512i)(__v32hi)
4017 { __A, __A, __A, __A, __A, __A, __A, __A,
4018 __A, __A, __A, __A, __A, __A, __A, __A,
4019 __A, __A, __A, __A, __A, __A, __A, __A,
4020 __A, __A, __A, __A, __A, __A, __A, __A };
4021}
756c5857 4022
79fb4764
HJ
4023extern __inline __m512d
4024__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4025_mm512_set1_pd (double __A)
4026{
4027 return __extension__ (__m512d)(__v8df)
4028 { __A, __A, __A, __A, __A, __A, __A, __A };
4029}
756c5857 4030
79fb4764
HJ
4031extern __inline __m512
4032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4033_mm512_set1_ps (float __A)
4034{
4035 return __extension__ (__m512)(__v16sf)
4036 { __A, __A, __A, __A, __A, __A, __A, __A,
4037 __A, __A, __A, __A, __A, __A, __A, __A };
4038}
756c5857 4039
79fb4764
HJ
4040/* Create the vector [A B C D A B C D A B C D A B C D]. */
4041extern __inline __m512i
4042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4043_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
4044{
4045 return __extension__ (__m512i)(__v16si)
4046 { __D, __C, __B, __A, __D, __C, __B, __A,
4047 __D, __C, __B, __A, __D, __C, __B, __A };
4048}
756c5857 4049
79fb4764
HJ
4050extern __inline __m512i
4051__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4052_mm512_set4_epi64 (long long __A, long long __B, long long __C,
4053 long long __D)
4054{
4055 return __extension__ (__m512i) (__v8di)
4056 { __D, __C, __B, __A, __D, __C, __B, __A };
4057}
756c5857 4058
79fb4764
HJ
4059extern __inline __m512d
4060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4061_mm512_set4_pd (double __A, double __B, double __C, double __D)
4062{
4063 return __extension__ (__m512d)
4064 { __D, __C, __B, __A, __D, __C, __B, __A };
4065}
756c5857 4066
79fb4764
HJ
4067extern __inline __m512
4068__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4069_mm512_set4_ps (float __A, float __B, float __C, float __D)
4070{
4071 return __extension__ (__m512)
4072 { __D, __C, __B, __A, __D, __C, __B, __A,
4073 __D, __C, __B, __A, __D, __C, __B, __A };
4074}
756c5857 4075
79fb4764
HJ
4076#define _mm512_setr4_epi64(e0,e1,e2,e3) \
4077 _mm512_set4_epi64(e3,e2,e1,e0)
756c5857 4078
79fb4764
HJ
4079#define _mm512_setr4_epi32(e0,e1,e2,e3) \
4080 _mm512_set4_epi32(e3,e2,e1,e0)
756c5857 4081
79fb4764
HJ
4082#define _mm512_setr4_pd(e0,e1,e2,e3) \
4083 _mm512_set4_pd(e3,e2,e1,e0)
756c5857 4084
79fb4764
HJ
4085#define _mm512_setr4_ps(e0,e1,e2,e3) \
4086 _mm512_set4_ps(e3,e2,e1,e0)
756c5857 4087
79fb4764
HJ
4088extern __inline __m512
4089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4090_mm512_setzero_ps (void)
4091{
4092 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
4093 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
4094}
756c5857 4095
79fb4764
HJ
4096extern __inline __m512
4097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4098_mm512_setzero (void)
4099{
4100 return _mm512_setzero_ps ();
4101}
756c5857 4102
79fb4764
HJ
4103extern __inline __m512d
4104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4105_mm512_setzero_pd (void)
4106{
4107 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
4108}
756c5857 4109
79fb4764
HJ
4110extern __inline __m512i
4111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4112_mm512_setzero_epi32 (void)
4113{
4114 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
4115}
756c5857 4116
79fb4764
HJ
4117extern __inline __m512i
4118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4119_mm512_setzero_si512 (void)
4120{
4121 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
4122}
756c5857 4123
79fb4764
HJ
4124extern __inline __m512d
4125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4126_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
4127{
4128 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
4129 (__v8df) __W,
4130 (__mmask8) __U);
4131}
756c5857 4132
79fb4764
HJ
4133extern __inline __m512d
4134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4135_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
4136{
4137 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
4138 (__v8df)
4139 _mm512_setzero_pd (),
4140 (__mmask8) __U);
4141}
756c5857 4142
79fb4764 4143extern __inline __m512
756c5857 4144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4145_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
756c5857 4146{
79fb4764
HJ
4147 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
4148 (__v16sf) __W,
4149 (__mmask16) __U);
756c5857
AI
4150}
4151
79fb4764 4152extern __inline __m512
756c5857 4153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4154_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
756c5857 4155{
79fb4764
HJ
4156 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
4157 (__v16sf)
4158 _mm512_setzero_ps (),
4159 (__mmask16) __U);
756c5857
AI
4160}
4161
79fb4764 4162extern __inline __m512d
756c5857 4163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4164_mm512_load_pd (void const *__P)
756c5857 4165{
79fb4764 4166 return *(__m512d *) __P;
756c5857
AI
4167}
4168
79fb4764 4169extern __inline __m512d
756c5857 4170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4171_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
756c5857 4172{
79fb4764
HJ
4173 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4174 (__v8df) __W,
4175 (__mmask8) __U);
756c5857
AI
4176}
4177
79fb4764 4178extern __inline __m512d
756c5857 4179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4180_mm512_maskz_load_pd (__mmask8 __U, void const *__P)
756c5857 4181{
79fb4764
HJ
4182 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4183 (__v8df)
4184 _mm512_setzero_pd (),
4185 (__mmask8) __U);
4186}
4187
4188extern __inline void
4189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4190_mm512_store_pd (void *__P, __m512d __A)
4191{
4192 *(__m512d *) __P = __A;
756c5857
AI
4193}
4194
79fb4764 4195extern __inline void
756c5857 4196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4197_mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
756c5857 4198{
79fb4764
HJ
4199 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
4200 (__mmask8) __U);
756c5857
AI
4201}
4202
4203extern __inline __m512
4204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4205_mm512_load_ps (void const *__P)
756c5857 4206{
79fb4764 4207 return *(__m512 *) __P;
756c5857
AI
4208}
4209
4210extern __inline __m512
4211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4212_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
756c5857 4213{
79fb4764
HJ
4214 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4215 (__v16sf) __W,
4216 (__mmask16) __U);
756c5857
AI
4217}
4218
4219extern __inline __m512
4220__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4221_mm512_maskz_load_ps (__mmask16 __U, void const *__P)
756c5857 4222{
79fb4764
HJ
4223 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4224 (__v16sf)
4225 _mm512_setzero_ps (),
4226 (__mmask16) __U);
756c5857
AI
4227}
4228
79fb4764 4229extern __inline void
756c5857 4230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4231_mm512_store_ps (void *__P, __m512 __A)
756c5857 4232{
79fb4764 4233 *(__m512 *) __P = __A;
756c5857
AI
4234}
4235
79fb4764 4236extern __inline void
756c5857 4237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4238_mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
756c5857 4239{
79fb4764
HJ
4240 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
4241 (__mmask16) __U);
756c5857
AI
4242}
4243
79fb4764 4244extern __inline __m512i
756c5857 4245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4246_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
756c5857 4247{
79fb4764
HJ
4248 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
4249 (__v8di) __W,
4250 (__mmask8) __U);
756c5857
AI
4251}
4252
4253extern __inline __m512i
4254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4255_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
756c5857 4256{
79fb4764
HJ
4257 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
4258 (__v8di)
4259 _mm512_setzero_si512 (),
4260 (__mmask8) __U);
756c5857
AI
4261}
4262
4263extern __inline __m512i
4264__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4265_mm512_load_epi64 (void const *__P)
756c5857 4266{
79fb4764 4267 return *(__m512i *) __P;
756c5857
AI
4268}
4269
4270extern __inline __m512i
4271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4272_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
756c5857 4273{
79fb4764
HJ
4274 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
4275 (__v8di) __W,
4276 (__mmask8) __U);
756c5857
AI
4277}
4278
4279extern __inline __m512i
4280__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4281_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
756c5857 4282{
79fb4764
HJ
4283 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
4284 (__v8di)
4285 _mm512_setzero_si512 (),
4286 (__mmask8) __U);
756c5857
AI
4287}
4288
79fb4764 4289extern __inline void
756c5857 4290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4291_mm512_store_epi64 (void *__P, __m512i __A)
756c5857 4292{
79fb4764 4293 *(__m512i *) __P = __A;
756c5857
AI
4294}
4295
79fb4764 4296extern __inline void
756c5857 4297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4298_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
756c5857 4299{
79fb4764
HJ
4300 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
4301 (__mmask8) __U);
756c5857
AI
4302}
4303
4304extern __inline __m512i
4305__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4306_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
756c5857 4307{
79fb4764
HJ
4308 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
4309 (__v16si) __W,
4310 (__mmask16) __U);
756c5857
AI
4311}
4312
4313extern __inline __m512i
4314__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4315_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
756c5857 4316{
79fb4764
HJ
4317 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
4318 (__v16si)
4319 _mm512_setzero_si512 (),
4320 (__mmask16) __U);
756c5857
AI
4321}
4322
4323extern __inline __m512i
4324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4325_mm512_load_si512 (void const *__P)
756c5857 4326{
79fb4764 4327 return *(__m512i *) __P;
756c5857
AI
4328}
4329
4330extern __inline __m512i
4331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4332_mm512_load_epi32 (void const *__P)
756c5857 4333{
79fb4764 4334 return *(__m512i *) __P;
756c5857
AI
4335}
4336
4337extern __inline __m512i
4338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4339_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
756c5857 4340{
79fb4764
HJ
4341 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
4342 (__v16si) __W,
4343 (__mmask16) __U);
756c5857
AI
4344}
4345
4346extern __inline __m512i
4347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4348_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
756c5857 4349{
79fb4764
HJ
4350 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
4351 (__v16si)
4352 _mm512_setzero_si512 (),
4353 (__mmask16) __U);
756c5857
AI
4354}
4355
79fb4764 4356extern __inline void
756c5857 4357__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4358_mm512_store_si512 (void *__P, __m512i __A)
756c5857 4359{
79fb4764 4360 *(__m512i *) __P = __A;
756c5857
AI
4361}
4362
79fb4764 4363extern __inline void
756c5857 4364__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4365_mm512_store_epi32 (void *__P, __m512i __A)
756c5857 4366{
79fb4764 4367 *(__m512i *) __P = __A;
756c5857
AI
4368}
4369
79fb4764 4370extern __inline void
756c5857 4371__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4372_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
756c5857 4373{
79fb4764
HJ
4374 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
4375 (__mmask16) __U);
756c5857
AI
4376}
4377
4378extern __inline __m512i
4379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4380_mm512_mullo_epi32 (__m512i __A, __m512i __B)
756c5857 4381{
79fb4764 4382 return (__m512i) ((__v16su) __A * (__v16su) __B);
756c5857
AI
4383}
4384
4385extern __inline __m512i
4386__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4387_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
756c5857 4388{
79fb4764
HJ
4389 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
4390 (__v16si) __B,
4391 (__v16si)
4392 _mm512_setzero_si512 (),
4393 __M);
756c5857
AI
4394}
4395
4396extern __inline __m512i
4397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4398_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
756c5857 4399{
79fb4764
HJ
4400 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
4401 (__v16si) __B,
4402 (__v16si) __W, __M);
756c5857
AI
4403}
4404
79fb4764 4405extern __inline __m512i
756c5857 4406__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4407_mm512_mullox_epi64 (__m512i __A, __m512i __B)
756c5857 4408{
79fb4764 4409 return (__m512i) ((__v8du) __A * (__v8du) __B);
756c5857
AI
4410}
4411
79fb4764 4412extern __inline __m512i
756c5857 4413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4414_mm512_mask_mullox_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
756c5857 4415{
79fb4764 4416 return _mm512_mask_mov_epi64 (__W, __M, _mm512_mullox_epi64 (__A, __B));
756c5857
AI
4417}
4418
79fb4764 4419extern __inline __m512i
756c5857 4420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4421_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
756c5857 4422{
79fb4764
HJ
4423 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
4424 (__v16si) __Y,
4425 (__v16si)
4426 _mm512_undefined_epi32 (),
4427 (__mmask16) -1);
4428}
4429
4430extern __inline __m512i
4431__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4432_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
4433{
4434 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
4435 (__v16si) __Y,
4436 (__v16si) __W,
4437 (__mmask16) __U);
4438}
4439
4440extern __inline __m512i
4441__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4442_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
4443{
4444 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
4445 (__v16si) __Y,
4446 (__v16si)
4447 _mm512_setzero_si512 (),
4448 (__mmask16) __U);
756c5857
AI
4449}
4450
4451extern __inline __m512i
4452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4453_mm512_srav_epi32 (__m512i __X, __m512i __Y)
756c5857 4454{
79fb4764
HJ
4455 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
4456 (__v16si) __Y,
4457 (__v16si)
4458 _mm512_undefined_epi32 (),
4459 (__mmask16) -1);
756c5857
AI
4460}
4461
4462extern __inline __m512i
4463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4464_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
756c5857 4465{
79fb4764
HJ
4466 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
4467 (__v16si) __Y,
4468 (__v16si) __W,
4469 (__mmask16) __U);
756c5857
AI
4470}
4471
4472extern __inline __m512i
4473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4474_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
756c5857 4475{
79fb4764
HJ
4476 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
4477 (__v16si) __Y,
4478 (__v16si)
4479 _mm512_setzero_si512 (),
4480 (__mmask16) __U);
756c5857
AI
4481}
4482
756c5857
AI
4483extern __inline __m512i
4484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4485_mm512_srlv_epi32 (__m512i __X, __m512i __Y)
756c5857 4486{
79fb4764
HJ
4487 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
4488 (__v16si) __Y,
756c5857 4489 (__v16si)
4271e5cb 4490 _mm512_undefined_epi32 (),
756c5857
AI
4491 (__mmask16) -1);
4492}
4493
4494extern __inline __m512i
4495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4496_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
756c5857 4497{
79fb4764
HJ
4498 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
4499 (__v16si) __Y,
756c5857
AI
4500 (__v16si) __W,
4501 (__mmask16) __U);
4502}
4503
4504extern __inline __m512i
4505__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4506_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
756c5857 4507{
79fb4764
HJ
4508 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
4509 (__v16si) __Y,
756c5857
AI
4510 (__v16si)
4511 _mm512_setzero_si512 (),
4512 (__mmask16) __U);
4513}
4514
4515extern __inline __m512i
4516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4517_mm512_add_epi64 (__m512i __A, __m512i __B)
756c5857 4518{
79fb4764 4519 return (__m512i) ((__v8du) __A + (__v8du) __B);
756c5857
AI
4520}
4521
4522extern __inline __m512i
4523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4524_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
756c5857 4525{
79fb4764
HJ
4526 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
4527 (__v8di) __B,
4528 (__v8di) __W,
4529 (__mmask8) __U);
756c5857
AI
4530}
4531
4532extern __inline __m512i
4533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4534_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
756c5857 4535{
79fb4764
HJ
4536 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
4537 (__v8di) __B,
4538 (__v8di)
4539 _mm512_setzero_si512 (),
4540 (__mmask8) __U);
756c5857
AI
4541}
4542
4543extern __inline __m512i
4544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4545_mm512_sub_epi64 (__m512i __A, __m512i __B)
756c5857 4546{
79fb4764 4547 return (__m512i) ((__v8du) __A - (__v8du) __B);
756c5857
AI
4548}
4549
4550extern __inline __m512i
4551__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4552_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
756c5857 4553{
79fb4764
HJ
4554 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
4555 (__v8di) __B,
4556 (__v8di) __W,
4557 (__mmask8) __U);
756c5857
AI
4558}
4559
4560extern __inline __m512i
4561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4562_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
756c5857 4563{
79fb4764
HJ
4564 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
4565 (__v8di) __B,
4566 (__v8di)
4567 _mm512_setzero_si512 (),
4568 (__mmask8) __U);
756c5857
AI
4569}
4570
79fb4764 4571extern __inline __m512i
756c5857 4572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4573_mm512_sllv_epi64 (__m512i __X, __m512i __Y)
756c5857 4574{
79fb4764
HJ
4575 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
4576 (__v8di) __Y,
4577 (__v8di)
4578 _mm512_undefined_pd (),
4579 (__mmask8) -1);
756c5857
AI
4580}
4581
79fb4764 4582extern __inline __m512i
756c5857 4583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4584_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
756c5857 4585{
79fb4764
HJ
4586 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
4587 (__v8di) __Y,
4588 (__v8di) __W,
4589 (__mmask8) __U);
756c5857
AI
4590}
4591
79fb4764 4592extern __inline __m512i
756c5857 4593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4594_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
756c5857 4595{
79fb4764
HJ
4596 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
4597 (__v8di) __Y,
4598 (__v8di)
4599 _mm512_setzero_si512 (),
4600 (__mmask8) __U);
756c5857
AI
4601}
4602
79fb4764 4603extern __inline __m512i
756c5857 4604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4605_mm512_srav_epi64 (__m512i __X, __m512i __Y)
756c5857 4606{
79fb4764
HJ
4607 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
4608 (__v8di) __Y,
4609 (__v8di)
4610 _mm512_undefined_epi32 (),
4611 (__mmask8) -1);
756c5857
AI
4612}
4613
79fb4764 4614extern __inline __m512i
756c5857 4615__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4616_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
756c5857 4617{
79fb4764
HJ
4618 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
4619 (__v8di) __Y,
4620 (__v8di) __W,
4621 (__mmask8) __U);
756c5857
AI
4622}
4623
79fb4764 4624extern __inline __m512i
756c5857 4625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4626_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
756c5857 4627{
79fb4764
HJ
4628 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
4629 (__v8di) __Y,
4630 (__v8di)
4631 _mm512_setzero_si512 (),
4632 (__mmask8) __U);
756c5857
AI
4633}
4634
756c5857
AI
4635extern __inline __m512i
4636__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4637_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
756c5857 4638{
79fb4764
HJ
4639 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
4640 (__v8di) __Y,
4641 (__v8di)
4642 _mm512_undefined_epi32 (),
4643 (__mmask8) -1);
756c5857
AI
4644}
4645
4646extern __inline __m512i
4647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4648_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
756c5857 4649{
79fb4764
HJ
4650 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
4651 (__v8di) __Y,
4652 (__v8di) __W,
4653 (__mmask8) __U);
756c5857
AI
4654}
4655
4656extern __inline __m512i
4657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4658_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
756c5857 4659{
79fb4764
HJ
4660 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
4661 (__v8di) __Y,
4662 (__v8di)
4663 _mm512_setzero_si512 (),
4664 (__mmask8) __U);
756c5857
AI
4665}
4666
4667extern __inline __m512i
4668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4669_mm512_add_epi32 (__m512i __A, __m512i __B)
756c5857 4670{
79fb4764 4671 return (__m512i) ((__v16su) __A + (__v16su) __B);
756c5857
AI
4672}
4673
4674extern __inline __m512i
4675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4676_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
756c5857 4677{
79fb4764
HJ
4678 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
4679 (__v16si) __B,
4680 (__v16si) __W,
4681 (__mmask16) __U);
756c5857
AI
4682}
4683
4684extern __inline __m512i
4685__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4686_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
756c5857 4687{
79fb4764
HJ
4688 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
4689 (__v16si) __B,
4690 (__v16si)
4691 _mm512_setzero_si512 (),
4692 (__mmask16) __U);
756c5857
AI
4693}
4694
4695extern __inline __m512i
4696__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4697_mm512_mul_epi32 (__m512i __X, __m512i __Y)
756c5857 4698{
79fb4764
HJ
4699 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
4700 (__v16si) __Y,
756c5857 4701 (__v8di)
4271e5cb 4702 _mm512_undefined_epi32 (),
756c5857
AI
4703 (__mmask8) -1);
4704}
4705
4706extern __inline __m512i
4707__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4708_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
756c5857 4709{
79fb4764
HJ
4710 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
4711 (__v16si) __Y,
4712 (__v8di) __W, __M);
756c5857
AI
4713}
4714
4715extern __inline __m512i
4716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4717_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
756c5857 4718{
79fb4764
HJ
4719 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
4720 (__v16si) __Y,
756c5857
AI
4721 (__v8di)
4722 _mm512_setzero_si512 (),
79fb4764 4723 __M);
756c5857
AI
4724}
4725
4726extern __inline __m512i
4727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4728_mm512_sub_epi32 (__m512i __A, __m512i __B)
756c5857 4729{
79fb4764 4730 return (__m512i) ((__v16su) __A - (__v16su) __B);
756c5857
AI
4731}
4732
4733extern __inline __m512i
4734__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4735_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
756c5857 4736{
79fb4764
HJ
4737 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
4738 (__v16si) __B,
4739 (__v16si) __W,
4740 (__mmask16) __U);
756c5857
AI
4741}
4742
4743extern __inline __m512i
4744__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4745_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
756c5857 4746{
79fb4764
HJ
4747 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
4748 (__v16si) __B,
4749 (__v16si)
4750 _mm512_setzero_si512 (),
4751 (__mmask16) __U);
756c5857
AI
4752}
4753
79fb4764 4754extern __inline __m512i
756c5857 4755__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
4756_mm512_mul_epu32 (__m512i __X, __m512i __Y)
4757{
4758 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
4759 (__v16si) __Y,
4760 (__v8di)
4761 _mm512_undefined_epi32 (),
4762 (__mmask8) -1);
756c5857
AI
4763}
4764
79fb4764 4765extern __inline __m512i
756c5857 4766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4767_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
756c5857 4768{
79fb4764
HJ
4769 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
4770 (__v16si) __Y,
4771 (__v8di) __W, __M);
756c5857
AI
4772}
4773
79fb4764 4774extern __inline __m512i
756c5857 4775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4776_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
756c5857 4777{
79fb4764
HJ
4778 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
4779 (__v16si) __Y,
4780 (__v8di)
4781 _mm512_setzero_si512 (),
4782 __M);
756c5857
AI
4783}
4784
79fb4764
HJ
4785#ifdef __OPTIMIZE__
4786extern __inline __m512i
756c5857 4787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4788_mm512_slli_epi64 (__m512i __A, unsigned int __B)
756c5857 4789{
79fb4764
HJ
4790 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
4791 (__v8di)
4792 _mm512_undefined_epi32 (),
4793 (__mmask8) -1);
756c5857
AI
4794}
4795
79fb4764 4796extern __inline __m512i
756c5857 4797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
4798_mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
4799 unsigned int __B)
756c5857 4800{
79fb4764
HJ
4801 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
4802 (__v8di) __W,
4803 (__mmask8) __U);
756c5857
AI
4804}
4805
79fb4764 4806extern __inline __m512i
756c5857 4807__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4808_mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
756c5857 4809{
79fb4764
HJ
4810 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
4811 (__v8di)
4812 _mm512_setzero_si512 (),
4813 (__mmask8) __U);
756c5857
AI
4814}
4815#else
79fb4764
HJ
4816#define _mm512_slli_epi64(X, C) \
4817 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), \
4818 (unsigned int)(C), \
4819 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
4820 (__mmask8)-1))
756c5857 4821
79fb4764
HJ
4822#define _mm512_mask_slli_epi64(W, U, X, C) \
4823 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), \
4824 (unsigned int)(C), \
4825 (__v8di)(__m512i)(W), \
4826 (__mmask8)(U)))
756c5857 4827
79fb4764
HJ
4828#define _mm512_maskz_slli_epi64(U, X, C) \
4829 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), \
4830 (unsigned int)(C), \
4831 (__v8di)(__m512i)_mm512_setzero_si512 (), \
4832 (__mmask8)(U)))
756c5857
AI
4833#endif
4834
79fb4764 4835extern __inline __m512i
756c5857 4836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4837_mm512_sll_epi64 (__m512i __A, __m128i __B)
756c5857 4838{
79fb4764
HJ
4839 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
4840 (__v2di) __B,
4841 (__v8di)
4842 _mm512_undefined_epi32 (),
4843 (__mmask8) -1);
756c5857
AI
4844}
4845
79fb4764 4846extern __inline __m512i
756c5857 4847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4848_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
756c5857 4849{
79fb4764
HJ
4850 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
4851 (__v2di) __B,
4852 (__v8di) __W,
4853 (__mmask8) __U);
756c5857
AI
4854}
4855
79fb4764 4856extern __inline __m512i
756c5857 4857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4858_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
756c5857 4859{
79fb4764
HJ
4860 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
4861 (__v2di) __B,
4862 (__v8di)
4863 _mm512_setzero_si512 (),
4864 (__mmask8) __U);
756c5857
AI
4865}
4866
79fb4764
HJ
4867#ifdef __OPTIMIZE__
4868extern __inline __m512i
756c5857 4869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4870_mm512_srli_epi64 (__m512i __A, unsigned int __B)
756c5857 4871{
79fb4764
HJ
4872 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
4873 (__v8di)
4874 _mm512_undefined_epi32 (),
4875 (__mmask8) -1);
756c5857
AI
4876}
4877
79fb4764 4878extern __inline __m512i
756c5857 4879__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
4880_mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
4881 __m512i __A, unsigned int __B)
756c5857 4882{
79fb4764
HJ
4883 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
4884 (__v8di) __W,
4885 (__mmask8) __U);
756c5857
AI
4886}
4887
79fb4764 4888extern __inline __m512i
756c5857 4889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4890_mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
756c5857 4891{
79fb4764
HJ
4892 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
4893 (__v8di)
4894 _mm512_setzero_si512 (),
4895 (__mmask8) __U);
756c5857
AI
4896}
4897#else
79fb4764
HJ
4898#define _mm512_srli_epi64(X, C) \
4899 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), \
4900 (unsigned int)(C), \
4901 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
4902 (__mmask8)-1))
756c5857 4903
79fb4764
HJ
4904#define _mm512_mask_srli_epi64(W, U, X, C) \
4905 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), \
4906 (unsigned int)(C), \
4907 (__v8di)(__m512i)(W), \
4908 (__mmask8)(U)))
756c5857 4909
79fb4764
HJ
4910#define _mm512_maskz_srli_epi64(U, X, C) \
4911 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), \
4912 (unsigned int)(C), \
4913 (__v8di)(__m512i)_mm512_setzero_si512 (), \
4914 (__mmask8)(U)))
756c5857
AI
4915#endif
4916
756c5857
AI
4917extern __inline __m512i
4918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4919_mm512_srl_epi64 (__m512i __A, __m128i __B)
756c5857 4920{
79fb4764
HJ
4921 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
4922 (__v2di) __B,
4923 (__v8di)
4924 _mm512_undefined_epi32 (),
4925 (__mmask8) -1);
756c5857
AI
4926}
4927
4928extern __inline __m512i
4929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4930_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
756c5857 4931{
79fb4764
HJ
4932 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
4933 (__v2di) __B,
4934 (__v8di) __W,
4935 (__mmask8) __U);
756c5857
AI
4936}
4937
4938extern __inline __m512i
4939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4940_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
756c5857 4941{
79fb4764
HJ
4942 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
4943 (__v2di) __B,
4944 (__v8di)
4945 _mm512_setzero_si512 (),
4946 (__mmask8) __U);
756c5857
AI
4947}
4948
79fb4764 4949#ifdef __OPTIMIZE__
756c5857
AI
4950extern __inline __m512i
4951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4952_mm512_srai_epi64 (__m512i __A, unsigned int __B)
756c5857 4953{
79fb4764
HJ
4954 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
4955 (__v8di)
4956 _mm512_undefined_epi32 (),
4957 (__mmask8) -1);
756c5857
AI
4958}
4959
4960extern __inline __m512i
4961__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
4962_mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
4963 unsigned int __B)
756c5857 4964{
79fb4764
HJ
4965 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
4966 (__v8di) __W,
4967 (__mmask8) __U);
756c5857
AI
4968}
4969
4970extern __inline __m512i
4971__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 4972_mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
756c5857 4973{
79fb4764
HJ
4974 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
4975 (__v8di)
4976 _mm512_setzero_si512 (),
4977 (__mmask8) __U);
756c5857
AI
4978}
4979#else
79fb4764
HJ
4980#define _mm512_srai_epi64(X, C) \
4981 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), \
4982 (unsigned int)(C), \
4983 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
4984 (__mmask8)-1))
756c5857 4985
79fb4764
HJ
4986#define _mm512_mask_srai_epi64(W, U, X, C) \
4987 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), \
4988 (unsigned int)(C), \
4989 (__v8di)(__m512i)(W), \
4990 (__mmask8)(U)))
756c5857 4991
79fb4764
HJ
4992#define _mm512_maskz_srai_epi64(U, X, C) \
4993 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), \
4994 (unsigned int)(C), \
4995 (__v8di)(__m512i)_mm512_setzero_si512 (), \
4996 (__mmask8)(U)))
756c5857
AI
4997#endif
4998
756c5857
AI
4999extern __inline __m512i
5000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5001_mm512_sra_epi64 (__m512i __A, __m128i __B)
756c5857 5002{
79fb4764
HJ
5003 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
5004 (__v2di) __B,
5005 (__v8di)
5006 _mm512_undefined_epi32 (),
5007 (__mmask8) -1);
756c5857
AI
5008}
5009
5010extern __inline __m512i
5011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5012_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
756c5857 5013{
79fb4764
HJ
5014 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
5015 (__v2di) __B,
5016 (__v8di) __W,
5017 (__mmask8) __U);
756c5857
AI
5018}
5019
5020extern __inline __m512i
5021__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5022_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
756c5857 5023{
79fb4764
HJ
5024 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
5025 (__v2di) __B,
5026 (__v8di)
5027 _mm512_setzero_si512 (),
5028 (__mmask8) __U);
756c5857
AI
5029}
5030
79fb4764 5031#ifdef __OPTIMIZE__
756c5857
AI
5032extern __inline __m512i
5033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5034_mm512_slli_epi32 (__m512i __A, unsigned int __B)
756c5857 5035{
79fb4764
HJ
5036 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
5037 (__v16si)
5038 _mm512_undefined_epi32 (),
5039 (__mmask16) -1);
756c5857
AI
5040}
5041
5042extern __inline __m512i
5043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
5044_mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
5045 unsigned int __B)
756c5857 5046{
79fb4764
HJ
5047 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
5048 (__v16si) __W,
5049 (__mmask16) __U);
756c5857
AI
5050}
5051
5052extern __inline __m512i
5053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5054_mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
756c5857 5055{
79fb4764
HJ
5056 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
5057 (__v16si)
5058 _mm512_setzero_si512 (),
5059 (__mmask16) __U);
756c5857
AI
5060}
5061#else
79fb4764
HJ
5062#define _mm512_slli_epi32(X, C) \
5063 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), \
5064 (unsigned int)(C), \
5065 (__v16si)(__m512i)_mm512_undefined_epi32 (), \
5066 (__mmask16)-1))
756c5857 5067
79fb4764
HJ
5068#define _mm512_mask_slli_epi32(W, U, X, C) \
5069 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), \
5070 (unsigned int)(C), \
5071 (__v16si)(__m512i)(W), \
5072 (__mmask16)(U)))
756c5857 5073
79fb4764
HJ
5074#define _mm512_maskz_slli_epi32(U, X, C) \
5075 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), \
5076 (unsigned int)(C), \
5077 (__v16si)(__m512i)_mm512_setzero_si512 (), \
5078 (__mmask16)(U)))
5079#endif
756c5857 5080
79fb4764
HJ
5081extern __inline __m512i
5082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5083_mm512_sll_epi32 (__m512i __A, __m128i __B)
5084{
5085 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
5086 (__v4si) __B,
5087 (__v16si)
5088 _mm512_undefined_epi32 (),
5089 (__mmask16) -1);
5090}
756c5857 5091
79fb4764
HJ
5092extern __inline __m512i
5093__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5094_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5095{
5096 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
5097 (__v4si) __B,
5098 (__v16si) __W,
5099 (__mmask16) __U);
5100}
756c5857 5101
79fb4764 5102extern __inline __m512i
756c5857 5103__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5104_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
756c5857 5105{
79fb4764
HJ
5106 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
5107 (__v4si) __B,
5108 (__v16si)
5109 _mm512_setzero_si512 (),
5110 (__mmask16) __U);
756c5857
AI
5111}
5112
756c5857 5113#ifdef __OPTIMIZE__
79fb4764 5114extern __inline __m512i
756c5857 5115__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5116_mm512_srli_epi32 (__m512i __A, unsigned int __B)
756c5857 5117{
79fb4764
HJ
5118 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
5119 (__v16si)
5120 _mm512_undefined_epi32 (),
5121 (__mmask16) -1);
756c5857
AI
5122}
5123
79fb4764 5124extern __inline __m512i
756c5857 5125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
5126_mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
5127 __m512i __A, unsigned int __B)
756c5857 5128{
79fb4764
HJ
5129 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
5130 (__v16si) __W,
5131 (__mmask16) __U);
756c5857
AI
5132}
5133
79fb4764 5134extern __inline __m512i
756c5857 5135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5136_mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
756c5857 5137{
79fb4764
HJ
5138 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
5139 (__v16si)
5140 _mm512_setzero_si512 (),
5141 (__mmask16) __U);
756c5857
AI
5142}
5143#else
79fb4764
HJ
5144#define _mm512_srli_epi32(X, C) \
5145 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), \
5146 (unsigned int)(C), \
5147 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
5148 (__mmask16)-1))
756c5857 5149
79fb4764
HJ
5150#define _mm512_mask_srli_epi32(W, U, X, C) \
5151 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), \
5152 (unsigned int)(C), \
5153 (__v16si)(__m512i)(W), \
5154 (__mmask16)(U)))
756c5857 5155
79fb4764
HJ
5156#define _mm512_maskz_srli_epi32(U, X, C) \
5157 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), \
5158 (unsigned int)(C), \
5159 (__v16si)(__m512i)_mm512_setzero_si512 (), \
5160 (__mmask16)(U)))
756c5857
AI
5161#endif
5162
79fb4764 5163extern __inline __m512i
756c5857 5164__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5165_mm512_srl_epi32 (__m512i __A, __m128i __B)
756c5857 5166{
79fb4764
HJ
5167 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
5168 (__v4si) __B,
5169 (__v16si)
5170 _mm512_undefined_epi32 (),
5171 (__mmask16) -1);
756c5857
AI
5172}
5173
79fb4764 5174extern __inline __m512i
756c5857 5175__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5176_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
756c5857 5177{
79fb4764
HJ
5178 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
5179 (__v4si) __B,
5180 (__v16si) __W,
5181 (__mmask16) __U);
756c5857
AI
5182}
5183
79fb4764 5184extern __inline __m512i
756c5857 5185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5186_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
756c5857 5187{
79fb4764
HJ
5188 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
5189 (__v4si) __B,
5190 (__v16si)
5191 _mm512_setzero_si512 (),
5192 (__mmask16) __U);
756c5857 5193}
756c5857 5194
756c5857 5195#ifdef __OPTIMIZE__
79fb4764 5196extern __inline __m512i
756c5857 5197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5198_mm512_srai_epi32 (__m512i __A, unsigned int __B)
756c5857 5199{
79fb4764
HJ
5200 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
5201 (__v16si)
5202 _mm512_undefined_epi32 (),
5203 (__mmask16) -1);
756c5857
AI
5204}
5205
79fb4764 5206extern __inline __m512i
756c5857 5207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
5208_mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
5209 unsigned int __B)
756c5857 5210{
79fb4764
HJ
5211 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
5212 (__v16si) __W,
5213 (__mmask16) __U);
756c5857
AI
5214}
5215
79fb4764 5216extern __inline __m512i
756c5857 5217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5218_mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
756c5857 5219{
79fb4764
HJ
5220 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
5221 (__v16si)
5222 _mm512_setzero_si512 (),
5223 (__mmask16) __U);
756c5857
AI
5224}
5225#else
79fb4764
HJ
5226#define _mm512_srai_epi32(X, C) \
5227 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), \
5228 (unsigned int)(C), \
5229 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
5230 (__mmask16)-1))
756c5857 5231
79fb4764
HJ
5232#define _mm512_mask_srai_epi32(W, U, X, C) \
5233 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), \
5234 (unsigned int)(C), \
5235 (__v16si)(__m512i)(W), \
5236 (__mmask16)(U)))
756c5857 5237
79fb4764
HJ
5238#define _mm512_maskz_srai_epi32(U, X, C) \
5239 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), \
5240 (unsigned int)(C), \
5241 (__v16si)(__m512i)_mm512_setzero_si512 (), \
5242 (__mmask16)(U)))
756c5857
AI
5243#endif
5244
79fb4764 5245extern __inline __m512i
d256b866 5246__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5247_mm512_sra_epi32 (__m512i __A, __m128i __B)
d256b866 5248{
79fb4764
HJ
5249 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
5250 (__v4si) __B,
5251 (__v16si)
5252 _mm512_undefined_epi32 (),
5253 (__mmask16) -1);
d256b866
IT
5254}
5255
79fb4764 5256extern __inline __m512i
756c5857 5257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5258_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
756c5857 5259{
79fb4764
HJ
5260 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
5261 (__v4si) __B,
5262 (__v16si) __W,
5263 (__mmask16) __U);
756c5857
AI
5264}
5265
79fb4764 5266extern __inline __m512i
756c5857 5267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5268_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
756c5857 5269{
79fb4764
HJ
5270 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
5271 (__v4si) __B,
5272 (__v16si)
5273 _mm512_setzero_si512 (),
5274 (__mmask16) __U);
756c5857
AI
5275}
5276
79fb4764
HJ
5277/* Constant helper to represent the ternary logic operations among
5278 vector A, B and C. */
5279typedef enum
756c5857 5280{
79fb4764
HJ
5281 _MM_TERNLOG_A = 0xF0,
5282 _MM_TERNLOG_B = 0xCC,
5283 _MM_TERNLOG_C = 0xAA
5284} _MM_TERNLOG_ENUM;
756c5857 5285
79fb4764
HJ
5286#ifdef __OPTIMIZE__
5287extern __inline __m512i
d256b866 5288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
5289_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
5290 const int __imm)
d256b866 5291{
79fb4764
HJ
5292 return (__m512i)
5293 __builtin_ia32_pternlogq512_mask ((__v8di) __A,
5294 (__v8di) __B,
5295 (__v8di) __C,
5296 (unsigned char) __imm,
5297 (__mmask8) -1);
d256b866
IT
5298}
5299
79fb4764 5300extern __inline __m512i
756c5857 5301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
5302_mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
5303 __m512i __C, const int __imm)
756c5857 5304{
79fb4764
HJ
5305 return (__m512i)
5306 __builtin_ia32_pternlogq512_mask ((__v8di) __A,
5307 (__v8di) __B,
5308 (__v8di) __C,
5309 (unsigned char) __imm,
5310 (__mmask8) __U);
756c5857
AI
5311}
5312
79fb4764 5313extern __inline __m512i
756c5857 5314__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
5315_mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
5316 __m512i __C, const int __imm)
756c5857 5317{
79fb4764
HJ
5318 return (__m512i)
5319 __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
5320 (__v8di) __B,
5321 (__v8di) __C,
5322 (unsigned char) __imm,
5323 (__mmask8) __U);
756c5857
AI
5324}
5325
79fb4764 5326extern __inline __m512i
756c5857 5327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
5328_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
5329 const int __imm)
756c5857 5330{
79fb4764
HJ
5331 return (__m512i)
5332 __builtin_ia32_pternlogd512_mask ((__v16si) __A,
5333 (__v16si) __B,
5334 (__v16si) __C,
5335 (unsigned char) __imm,
5336 (__mmask16) -1);
756c5857
AI
5337}
5338
79fb4764 5339extern __inline __m512i
d256b866 5340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
5341_mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
5342 __m512i __C, const int __imm)
d256b866 5343{
79fb4764
HJ
5344 return (__m512i)
5345 __builtin_ia32_pternlogd512_mask ((__v16si) __A,
5346 (__v16si) __B,
5347 (__v16si) __C,
5348 (unsigned char) __imm,
5349 (__mmask16) __U);
d256b866
IT
5350}
5351
79fb4764 5352extern __inline __m512i
756c5857 5353__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
5354_mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
5355 __m512i __C, const int __imm)
756c5857 5356{
79fb4764
HJ
5357 return (__m512i)
5358 __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
5359 (__v16si) __B,
5360 (__v16si) __C,
5361 (unsigned char) __imm,
5362 (__mmask16) __U);
756c5857 5363}
79fb4764
HJ
5364#else
5365#define _mm512_ternarylogic_epi64(A, B, C, I) \
5366 ((__m512i) \
5367 __builtin_ia32_pternlogq512_mask ((__v8di) (__m512i) (A), \
5368 (__v8di) (__m512i) (B), \
5369 (__v8di) (__m512i) (C), \
5370 (unsigned char) (I), \
5371 (__mmask8) -1))
5372#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
5373 ((__m512i) \
5374 __builtin_ia32_pternlogq512_mask ((__v8di) (__m512i) (A), \
5375 (__v8di) (__m512i) (B), \
5376 (__v8di) (__m512i) (C), \
5377 (unsigned char)(I), \
5378 (__mmask8) (U)))
5379#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
5380 ((__m512i) \
5381 __builtin_ia32_pternlogq512_maskz ((__v8di) (__m512i) (A), \
5382 (__v8di) (__m512i) (B), \
5383 (__v8di) (__m512i) (C), \
5384 (unsigned char) (I), \
5385 (__mmask8) (U)))
5386#define _mm512_ternarylogic_epi32(A, B, C, I) \
5387 ((__m512i) \
5388 __builtin_ia32_pternlogd512_mask ((__v16si) (__m512i) (A), \
5389 (__v16si) (__m512i) (B), \
5390 (__v16si) (__m512i) (C), \
5391 (unsigned char) (I), \
5392 (__mmask16) -1))
5393#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
5394 ((__m512i) \
5395 __builtin_ia32_pternlogd512_mask ((__v16si) (__m512i) (A), \
5396 (__v16si) (__m512i) (B), \
5397 (__v16si) (__m512i) (C), \
5398 (unsigned char) (I), \
5399 (__mmask16) (U)))
5400#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
5401 ((__m512i) \
5402 __builtin_ia32_pternlogd512_maskz ((__v16si) (__m512i) (A), \
5403 (__v16si) (__m512i) (B), \
5404 (__v16si) (__m512i) (C), \
5405 (unsigned char) (I), \
5406 (__mmask16) (U)))
5407#endif
756c5857 5408
79fb4764 5409extern __inline __m512d
756c5857 5410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5411_mm512_rcp14_pd (__m512d __A)
756c5857 5412{
79fb4764
HJ
5413 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
5414 (__v8df)
5415 _mm512_undefined_pd (),
5416 (__mmask8) -1);
756c5857
AI
5417}
5418
79fb4764 5419extern __inline __m512d
756c5857 5420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5421_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
756c5857 5422{
79fb4764
HJ
5423 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
5424 (__v8df) __W,
5425 (__mmask8) __U);
756c5857
AI
5426}
5427
79fb4764 5428extern __inline __m512d
d256b866 5429__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5430_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
d256b866 5431{
79fb4764
HJ
5432 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
5433 (__v8df)
5434 _mm512_setzero_pd (),
5435 (__mmask8) __U);
d256b866
IT
5436}
5437
79fb4764 5438extern __inline __m512
756c5857 5439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5440_mm512_rcp14_ps (__m512 __A)
756c5857 5441{
79fb4764
HJ
5442 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
5443 (__v16sf)
5444 _mm512_undefined_ps (),
5445 (__mmask16) -1);
756c5857
AI
5446}
5447
79fb4764 5448extern __inline __m512
756c5857 5449__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5450_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
756c5857 5451{
79fb4764
HJ
5452 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
5453 (__v16sf) __W,
5454 (__mmask16) __U);
756c5857
AI
5455}
5456
79fb4764 5457extern __inline __m512
756c5857 5458__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5459_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
756c5857 5460{
79fb4764
HJ
5461 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
5462 (__v16sf)
5463 _mm512_setzero_ps (),
5464 (__mmask16) __U);
756c5857
AI
5465}
5466
79fb4764 5467extern __inline __m512d
d256b866 5468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5469_mm512_rsqrt14_pd (__m512d __A)
d256b866 5470{
79fb4764
HJ
5471 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
5472 (__v8df)
5473 _mm512_undefined_pd (),
5474 (__mmask8) -1);
d256b866
IT
5475}
5476
79fb4764 5477extern __inline __m512d
756c5857 5478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5479_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
756c5857 5480{
79fb4764
HJ
5481 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
5482 (__v8df) __W,
5483 (__mmask8) __U);
756c5857
AI
5484}
5485
79fb4764 5486extern __inline __m512d
756c5857 5487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5488_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
756c5857 5489{
79fb4764
HJ
5490 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
5491 (__v8df)
5492 _mm512_setzero_pd (),
5493 (__mmask8) __U);
756c5857
AI
5494}
5495
79fb4764 5496extern __inline __m512
756c5857 5497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5498_mm512_rsqrt14_ps (__m512 __A)
756c5857 5499{
79fb4764
HJ
5500 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
5501 (__v16sf)
5502 _mm512_undefined_ps (),
756c5857
AI
5503 (__mmask16) -1);
5504}
5505
79fb4764 5506extern __inline __m512
d256b866 5507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5508_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
d256b866 5509{
79fb4764
HJ
5510 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
5511 (__v16sf) __W,
5512 (__mmask16) __U);
d256b866
IT
5513}
5514
79fb4764 5515extern __inline __m512
756c5857 5516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5517_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
756c5857 5518{
79fb4764
HJ
5519 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
5520 (__v16sf)
5521 _mm512_setzero_ps (),
5522 (__mmask16) __U);
756c5857
AI
5523}
5524
79fb4764
HJ
5525#ifdef __OPTIMIZE__
5526extern __inline __m512d
756c5857 5527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5528_mm512_sqrt_round_pd (__m512d __A, const int __R)
756c5857 5529{
79fb4764
HJ
5530 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
5531 (__v8df)
5532 _mm512_undefined_pd (),
5533 (__mmask8) -1, __R);
756c5857
AI
5534}
5535
79fb4764 5536extern __inline __m512d
756c5857 5537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
5538_mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
5539 const int __R)
756c5857 5540{
79fb4764
HJ
5541 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
5542 (__v8df) __W,
5543 (__mmask8) __U, __R);
756c5857
AI
5544}
5545
79fb4764 5546extern __inline __m512d
d256b866 5547__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5548_mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
d256b866 5549{
79fb4764
HJ
5550 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
5551 (__v8df)
5552 _mm512_setzero_pd (),
5553 (__mmask8) __U, __R);
d256b866
IT
5554}
5555
79fb4764 5556extern __inline __m512
756c5857 5557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5558_mm512_sqrt_round_ps (__m512 __A, const int __R)
756c5857 5559{
79fb4764
HJ
5560 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
5561 (__v16sf)
5562 _mm512_undefined_ps (),
5563 (__mmask16) -1, __R);
756c5857
AI
5564}
5565
79fb4764 5566extern __inline __m512
756c5857 5567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5568_mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
756c5857 5569{
79fb4764
HJ
5570 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
5571 (__v16sf) __W,
5572 (__mmask16) __U, __R);
756c5857
AI
5573}
5574
79fb4764 5575extern __inline __m512
756c5857 5576__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5577_mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
756c5857 5578{
79fb4764
HJ
5579 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
5580 (__v16sf)
5581 _mm512_setzero_ps (),
5582 (__mmask16) __U, __R);
756c5857
AI
5583}
5584
79fb4764
HJ
5585#else
5586#define _mm512_sqrt_round_pd(A, C) \
5587 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
d256b866 5588
79fb4764
HJ
5589#define _mm512_mask_sqrt_round_pd(W, U, A, C) \
5590 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
5591
5592#define _mm512_maskz_sqrt_round_pd(U, A, C) \
5593 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
5594
5595#define _mm512_sqrt_round_ps(A, C) \
5596 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
5597
5598#define _mm512_mask_sqrt_round_ps(W, U, A, C) \
5599 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
5600
5601#define _mm512_maskz_sqrt_round_ps(U, A, C) \
5602 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
5603
5604#endif
5605
5606extern __inline __m512i
756c5857 5607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5608_mm512_cvtepi8_epi32 (__m128i __A)
756c5857 5609{
79fb4764
HJ
5610 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
5611 (__v16si)
5612 _mm512_undefined_epi32 (),
5613 (__mmask16) -1);
756c5857
AI
5614}
5615
79fb4764 5616extern __inline __m512i
756c5857 5617__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5618_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
756c5857 5619{
79fb4764
HJ
5620 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
5621 (__v16si) __W,
5622 (__mmask16) __U);
756c5857
AI
5623}
5624
79fb4764 5625extern __inline __m512i
756c5857 5626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5627_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
756c5857 5628{
79fb4764
HJ
5629 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
5630 (__v16si)
5631 _mm512_setzero_si512 (),
5632 (__mmask16) __U);
756c5857
AI
5633}
5634
79fb4764 5635extern __inline __m512i
d256b866 5636__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5637_mm512_cvtepi8_epi64 (__m128i __A)
d256b866 5638{
79fb4764
HJ
5639 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
5640 (__v8di)
5641 _mm512_undefined_epi32 (),
5642 (__mmask8) -1);
d256b866
IT
5643}
5644
79fb4764 5645extern __inline __m512i
756c5857 5646__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5647_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
756c5857 5648{
79fb4764
HJ
5649 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
5650 (__v8di) __W,
5651 (__mmask8) __U);
756c5857
AI
5652}
5653
79fb4764 5654extern __inline __m512i
756c5857 5655__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
5656_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
5657{
5658 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
5659 (__v8di)
5660 _mm512_setzero_si512 (),
5661 (__mmask8) __U);
756c5857
AI
5662}
5663
79fb4764 5664extern __inline __m512i
756c5857 5665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5666_mm512_cvtepi16_epi32 (__m256i __A)
756c5857 5667{
79fb4764
HJ
5668 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
5669 (__v16si)
5670 _mm512_undefined_epi32 (),
5671 (__mmask16) -1);
756c5857
AI
5672}
5673
79fb4764 5674extern __inline __m512i
d256b866 5675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5676_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
d256b866 5677{
79fb4764
HJ
5678 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
5679 (__v16si) __W,
5680 (__mmask16) __U);
d256b866
IT
5681}
5682
79fb4764 5683extern __inline __m512i
756c5857 5684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5685_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
756c5857 5686{
79fb4764
HJ
5687 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
5688 (__v16si)
5689 _mm512_setzero_si512 (),
5690 (__mmask16) __U);
756c5857
AI
5691}
5692
79fb4764 5693extern __inline __m512i
756c5857 5694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5695_mm512_cvtepi16_epi64 (__m128i __A)
756c5857 5696{
79fb4764
HJ
5697 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
5698 (__v8di)
5699 _mm512_undefined_epi32 (),
5700 (__mmask8) -1);
756c5857
AI
5701}
5702
79fb4764 5703extern __inline __m512i
756c5857 5704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5705_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
756c5857 5706{
79fb4764
HJ
5707 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
5708 (__v8di) __W,
5709 (__mmask8) __U);
756c5857
AI
5710}
5711
79fb4764 5712extern __inline __m512i
d256b866 5713__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5714_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
d256b866 5715{
79fb4764
HJ
5716 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
5717 (__v8di)
5718 _mm512_setzero_si512 (),
5719 (__mmask8) __U);
d256b866
IT
5720}
5721
79fb4764 5722extern __inline __m512i
756c5857 5723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5724_mm512_cvtepi32_epi64 (__m256i __X)
756c5857 5725{
79fb4764
HJ
5726 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
5727 (__v8di)
5728 _mm512_undefined_epi32 (),
5729 (__mmask8) -1);
756c5857
AI
5730}
5731
79fb4764 5732extern __inline __m512i
756c5857 5733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5734_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
756c5857 5735{
79fb4764
HJ
5736 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
5737 (__v8di) __W,
5738 (__mmask8) __U);
756c5857
AI
5739}
5740
79fb4764 5741extern __inline __m512i
756c5857 5742__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5743_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
756c5857 5744{
79fb4764
HJ
5745 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
5746 (__v8di)
5747 _mm512_setzero_si512 (),
5748 (__mmask8) __U);
756c5857
AI
5749}
5750
79fb4764 5751extern __inline __m512i
d256b866 5752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5753_mm512_cvtepu8_epi32 (__m128i __A)
d256b866 5754{
79fb4764
HJ
5755 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
5756 (__v16si)
5757 _mm512_undefined_epi32 (),
5758 (__mmask16) -1);
d256b866
IT
5759}
5760
79fb4764 5761extern __inline __m512i
756c5857 5762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5763_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
756c5857 5764{
79fb4764
HJ
5765 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
5766 (__v16si) __W,
5767 (__mmask16) __U);
756c5857
AI
5768}
5769
79fb4764 5770extern __inline __m512i
756c5857 5771__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5772_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
756c5857 5773{
79fb4764
HJ
5774 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
5775 (__v16si)
5776 _mm512_setzero_si512 (),
5777 (__mmask16) __U);
756c5857
AI
5778}
5779
79fb4764 5780extern __inline __m512i
756c5857 5781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5782_mm512_cvtepu8_epi64 (__m128i __A)
756c5857 5783{
79fb4764
HJ
5784 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
5785 (__v8di)
5786 _mm512_undefined_epi32 (),
5787 (__mmask8) -1);
756c5857
AI
5788}
5789
79fb4764 5790extern __inline __m512i
d256b866 5791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5792_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
d256b866 5793{
79fb4764
HJ
5794 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
5795 (__v8di) __W,
5796 (__mmask8) __U);
d256b866
IT
5797}
5798
79fb4764 5799extern __inline __m512i
756c5857 5800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5801_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
756c5857 5802{
79fb4764
HJ
5803 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
5804 (__v8di)
5805 _mm512_setzero_si512 (),
5806 (__mmask8) __U);
756c5857
AI
5807}
5808
79fb4764 5809extern __inline __m512i
756c5857 5810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5811_mm512_cvtepu16_epi32 (__m256i __A)
756c5857 5812{
79fb4764
HJ
5813 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
5814 (__v16si)
5815 _mm512_undefined_epi32 (),
5816 (__mmask16) -1);
756c5857
AI
5817}
5818
79fb4764 5819extern __inline __m512i
756c5857 5820__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5821_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
756c5857 5822{
79fb4764
HJ
5823 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
5824 (__v16si) __W,
5825 (__mmask16) __U);
756c5857
AI
5826}
5827
79fb4764 5828extern __inline __m512i
d256b866 5829__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5830_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
d256b866 5831{
79fb4764
HJ
5832 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
5833 (__v16si)
5834 _mm512_setzero_si512 (),
5835 (__mmask16) __U);
d256b866
IT
5836}
5837
79fb4764 5838extern __inline __m512i
756c5857 5839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5840_mm512_cvtepu16_epi64 (__m128i __A)
756c5857 5841{
79fb4764
HJ
5842 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
5843 (__v8di)
5844 _mm512_undefined_epi32 (),
5845 (__mmask8) -1);
756c5857
AI
5846}
5847
79fb4764 5848extern __inline __m512i
756c5857 5849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5850_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
756c5857 5851{
79fb4764
HJ
5852 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
5853 (__v8di) __W,
5854 (__mmask8) __U);
756c5857
AI
5855}
5856
79fb4764 5857extern __inline __m512i
756c5857 5858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5859_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
756c5857 5860{
79fb4764
HJ
5861 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
5862 (__v8di)
5863 _mm512_setzero_si512 (),
5864 (__mmask8) __U);
756c5857
AI
5865}
5866
79fb4764 5867extern __inline __m512i
d256b866 5868__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5869_mm512_cvtepu32_epi64 (__m256i __X)
d256b866 5870{
79fb4764
HJ
5871 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
5872 (__v8di)
5873 _mm512_undefined_epi32 (),
5874 (__mmask8) -1);
d256b866
IT
5875}
5876
79fb4764 5877extern __inline __m512i
756c5857 5878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5879_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
756c5857 5880{
79fb4764
HJ
5881 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
5882 (__v8di) __W,
5883 (__mmask8) __U);
756c5857
AI
5884}
5885
79fb4764 5886extern __inline __m512i
756c5857 5887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5888_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
756c5857 5889{
79fb4764
HJ
5890 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
5891 (__v8di)
5892 _mm512_setzero_si512 (),
5893 (__mmask8) __U);
756c5857
AI
5894}
5895
79fb4764 5896#ifdef __OPTIMIZE__
756c5857
AI
5897extern __inline __m512d
5898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5899_mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
756c5857 5900{
79fb4764
HJ
5901 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
5902 (__v8df) __B,
5903 (__v8df)
5904 _mm512_undefined_pd (),
5905 (__mmask8) -1, __R);
756c5857
AI
5906}
5907
5908extern __inline __m512d
5909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
5910_mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
5911 __m512d __B, const int __R)
756c5857 5912{
79fb4764
HJ
5913 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
5914 (__v8df) __B,
5915 (__v8df) __W,
5916 (__mmask8) __U, __R);
756c5857
AI
5917}
5918
5919extern __inline __m512d
5920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
5921_mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
5922 const int __R)
756c5857 5923{
79fb4764
HJ
5924 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
5925 (__v8df) __B,
5926 (__v8df)
5927 _mm512_setzero_pd (),
5928 (__mmask8) __U, __R);
756c5857
AI
5929}
5930
79fb4764 5931extern __inline __m512
756c5857 5932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5933_mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
756c5857 5934{
79fb4764
HJ
5935 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
5936 (__v16sf) __B,
5937 (__v16sf)
5938 _mm512_undefined_ps (),
5939 (__mmask16) -1, __R);
756c5857
AI
5940}
5941
79fb4764 5942extern __inline __m512
756c5857 5943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
5944_mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
5945 __m512 __B, const int __R)
756c5857 5946{
79fb4764
HJ
5947 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
5948 (__v16sf) __B,
5949 (__v16sf) __W,
5950 (__mmask16) __U, __R);
756c5857
AI
5951}
5952
79fb4764 5953extern __inline __m512
756c5857 5954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5955_mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
756c5857 5956{
79fb4764
HJ
5957 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
5958 (__v16sf) __B,
5959 (__v16sf)
5960 _mm512_setzero_ps (),
5961 (__mmask16) __U, __R);
756c5857
AI
5962}
5963
79fb4764 5964extern __inline __m512d
756c5857 5965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 5966_mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
756c5857 5967{
79fb4764
HJ
5968 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
5969 (__v8df) __B,
5970 (__v8df)
5971 _mm512_undefined_pd (),
5972 (__mmask8) -1, __R);
756c5857
AI
5973}
5974
79fb4764 5975extern __inline __m512d
756c5857 5976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
5977_mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
5978 __m512d __B, const int __R)
756c5857 5979{
79fb4764
HJ
5980 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
5981 (__v8df) __B,
5982 (__v8df) __W,
5983 (__mmask8) __U, __R);
756c5857
AI
5984}
5985
79fb4764 5986extern __inline __m512d
756c5857 5987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
5988_mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
5989 const int __R)
756c5857 5990{
79fb4764
HJ
5991 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
5992 (__v8df) __B,
5993 (__v8df)
5994 _mm512_setzero_pd (),
5995 (__mmask8) __U, __R);
756c5857
AI
5996}
5997
5998extern __inline __m512
5999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6000_mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
756c5857 6001{
79fb4764
HJ
6002 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
6003 (__v16sf) __B,
6004 (__v16sf)
6005 _mm512_undefined_ps (),
6006 (__mmask16) -1, __R);
756c5857
AI
6007}
6008
6009extern __inline __m512
6010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6011_mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
6012 __m512 __B, const int __R)
756c5857 6013{
79fb4764
HJ
6014 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
6015 (__v16sf) __B,
6016 (__v16sf) __W,
6017 (__mmask16) __U, __R);
756c5857
AI
6018}
6019
6020extern __inline __m512
6021__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6022_mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
756c5857 6023{
79fb4764
HJ
6024 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
6025 (__v16sf) __B,
6026 (__v16sf)
6027 _mm512_setzero_ps (),
6028 (__mmask16) __U, __R);
756c5857 6029}
756c5857 6030#else
79fb4764
HJ
6031#define _mm512_add_round_pd(A, B, C) \
6032 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857 6033
79fb4764
HJ
6034#define _mm512_mask_add_round_pd(W, U, A, B, C) \
6035 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
756c5857 6036
79fb4764
HJ
6037#define _mm512_maskz_add_round_pd(U, A, B, C) \
6038 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
756c5857 6039
79fb4764
HJ
6040#define _mm512_add_round_ps(A, B, C) \
6041 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857 6042
79fb4764
HJ
6043#define _mm512_mask_add_round_ps(W, U, A, B, C) \
6044 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
756c5857 6045
79fb4764
HJ
6046#define _mm512_maskz_add_round_ps(U, A, B, C) \
6047 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
6048
6049#define _mm512_sub_round_pd(A, B, C) \
6050 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
6051
6052#define _mm512_mask_sub_round_pd(W, U, A, B, C) \
6053 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
6054
6055#define _mm512_maskz_sub_round_pd(U, A, B, C) \
6056 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
6057
6058#define _mm512_sub_round_ps(A, B, C) \
6059 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
6060
6061#define _mm512_mask_sub_round_ps(W, U, A, B, C) \
6062 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
6063
6064#define _mm512_maskz_sub_round_ps(U, A, B, C) \
6065 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
756c5857
AI
6066#endif
6067
6068#ifdef __OPTIMIZE__
79fb4764 6069extern __inline __m512d
756c5857 6070__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6071_mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
756c5857 6072{
79fb4764
HJ
6073 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
6074 (__v8df) __B,
6075 (__v8df)
6076 _mm512_undefined_pd (),
6077 (__mmask8) -1, __R);
756c5857
AI
6078}
6079
79fb4764 6080extern __inline __m512d
756c5857 6081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6082_mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
6083 __m512d __B, const int __R)
756c5857 6084{
79fb4764
HJ
6085 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
6086 (__v8df) __B,
6087 (__v8df) __W,
6088 (__mmask8) __U, __R);
756c5857
AI
6089}
6090
79fb4764 6091extern __inline __m512d
756c5857 6092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6093_mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6094 const int __R)
756c5857 6095{
79fb4764
HJ
6096 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
6097 (__v8df) __B,
6098 (__v8df)
6099 _mm512_setzero_pd (),
6100 (__mmask8) __U, __R);
756c5857
AI
6101}
6102
79fb4764 6103extern __inline __m512
756c5857 6104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6105_mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
756c5857 6106{
79fb4764
HJ
6107 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
6108 (__v16sf) __B,
6109 (__v16sf)
6110 _mm512_undefined_ps (),
6111 (__mmask16) -1, __R);
756c5857
AI
6112}
6113
79fb4764 6114extern __inline __m512
756c5857 6115__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6116_mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
6117 __m512 __B, const int __R)
6118{
6119 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
6120 (__v16sf) __B,
6121 (__v16sf) __W,
6122 (__mmask16) __U, __R);
756c5857
AI
6123}
6124
79fb4764 6125extern __inline __m512
756c5857 6126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6127_mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
756c5857 6128{
79fb4764
HJ
6129 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
6130 (__v16sf) __B,
6131 (__v16sf)
6132 _mm512_setzero_ps (),
6133 (__mmask16) __U, __R);
756c5857
AI
6134}
6135
79fb4764 6136extern __inline __m512d
756c5857 6137__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6138_mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
756c5857 6139{
79fb4764
HJ
6140 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
6141 (__v8df) __V,
6142 (__v8df)
6143 _mm512_undefined_pd (),
6144 (__mmask8) -1, __R);
756c5857
AI
6145}
6146
79fb4764 6147extern __inline __m512d
756c5857 6148__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6149_mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
6150 __m512d __V, const int __R)
756c5857 6151{
79fb4764
HJ
6152 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
6153 (__v8df) __V,
6154 (__v8df) __W,
6155 (__mmask8) __U, __R);
756c5857
AI
6156}
6157
79fb4764 6158extern __inline __m512d
756c5857 6159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6160_mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
6161 const int __R)
756c5857 6162{
79fb4764
HJ
6163 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
6164 (__v8df) __V,
6165 (__v8df)
6166 _mm512_setzero_pd (),
6167 (__mmask8) __U, __R);
756c5857
AI
6168}
6169
79fb4764 6170extern __inline __m512
756c5857 6171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6172_mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
756c5857 6173{
79fb4764
HJ
6174 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
6175 (__v16sf) __B,
6176 (__v16sf)
6177 _mm512_undefined_ps (),
6178 (__mmask16) -1, __R);
756c5857
AI
6179}
6180
79fb4764 6181extern __inline __m512
756c5857 6182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6183_mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
6184 __m512 __B, const int __R)
756c5857 6185{
79fb4764
HJ
6186 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
6187 (__v16sf) __B,
6188 (__v16sf) __W,
6189 (__mmask16) __U, __R);
756c5857
AI
6190}
6191
79fb4764 6192extern __inline __m512
756c5857 6193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6194_mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
756c5857 6195{
79fb4764
HJ
6196 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
6197 (__v16sf) __B,
6198 (__v16sf)
6199 _mm512_setzero_ps (),
6200 (__mmask16) __U, __R);
756c5857 6201}
79fb4764 6202
756c5857 6203#else
79fb4764
HJ
6204#define _mm512_mul_round_pd(A, B, C) \
6205 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857 6206
79fb4764
HJ
6207#define _mm512_mask_mul_round_pd(W, U, A, B, C) \
6208 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
756c5857 6209
79fb4764
HJ
6210#define _mm512_maskz_mul_round_pd(U, A, B, C) \
6211 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
756c5857 6212
79fb4764
HJ
6213#define _mm512_mul_round_ps(A, B, C) \
6214 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857 6215
79fb4764
HJ
6216#define _mm512_mask_mul_round_ps(W, U, A, B, C) \
6217 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
756c5857 6218
79fb4764
HJ
6219#define _mm512_maskz_mul_round_ps(U, A, B, C) \
6220 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
756c5857 6221
79fb4764
HJ
6222#define _mm512_div_round_pd(A, B, C) \
6223 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
756c5857 6224
79fb4764
HJ
6225#define _mm512_mask_div_round_pd(W, U, A, B, C) \
6226 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
756c5857 6227
79fb4764
HJ
6228#define _mm512_maskz_div_round_pd(U, A, B, C) \
6229 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
756c5857 6230
79fb4764
HJ
6231#define _mm512_div_round_ps(A, B, C) \
6232 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
756c5857 6233
79fb4764
HJ
6234#define _mm512_mask_div_round_ps(W, U, A, B, C) \
6235 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
756c5857 6236
79fb4764
HJ
6237#define _mm512_maskz_div_round_ps(U, A, B, C) \
6238 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
756c5857 6239
756c5857
AI
6240#endif
6241
6242#ifdef __OPTIMIZE__
79fb4764 6243extern __inline __m512d
756c5857 6244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6245_mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
756c5857 6246{
79fb4764
HJ
6247 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
6248 (__v8df) __B,
6249 (__v8df)
6250 _mm512_undefined_pd (),
6251 (__mmask8) -1, __R);
6252}
6253
6254extern __inline __m512d
6255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6256_mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
6257 __m512d __B, const int __R)
6258{
6259 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
6260 (__v8df) __B,
6261 (__v8df) __W,
6262 (__mmask8) __U, __R);
6263}
6264
6265extern __inline __m512d
6266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6267_mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6268 const int __R)
6269{
6270 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
6271 (__v8df) __B,
6272 (__v8df)
6273 _mm512_setzero_pd (),
6274 (__mmask8) __U, __R);
756c5857
AI
6275}
6276
6277extern __inline __m512
6278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6279_mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
756c5857 6280{
79fb4764
HJ
6281 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
6282 (__v16sf) __B,
6283 (__v16sf)
6284 _mm512_undefined_ps (),
6285 (__mmask16) -1, __R);
6286}
6287
6288extern __inline __m512
6289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6290_mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
6291 __m512 __B, const int __R)
6292{
6293 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
6294 (__v16sf) __B,
6295 (__v16sf) __W,
6296 (__mmask16) __U, __R);
6297}
6298
6299extern __inline __m512
6300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6301_mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
6302{
6303 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
6304 (__v16sf) __B,
6305 (__v16sf)
6306 _mm512_setzero_ps (),
6307 (__mmask16) __U, __R);
756c5857
AI
6308}
6309
79fb4764 6310extern __inline __m512d
756c5857 6311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6312_mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
756c5857 6313{
79fb4764
HJ
6314 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
6315 (__v8df) __B,
6316 (__v8df)
6317 _mm512_undefined_pd (),
6318 (__mmask8) -1, __R);
756c5857
AI
6319}
6320
79fb4764 6321extern __inline __m512d
756c5857 6322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6323_mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
6324 __m512d __B, const int __R)
756c5857 6325{
79fb4764
HJ
6326 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
6327 (__v8df) __B,
6328 (__v8df) __W,
6329 (__mmask8) __U, __R);
756c5857
AI
6330}
6331
79fb4764 6332extern __inline __m512d
756c5857 6333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6334_mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6335 const int __R)
756c5857 6336{
79fb4764
HJ
6337 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
6338 (__v8df) __B,
6339 (__v8df)
6340 _mm512_setzero_pd (),
6341 (__mmask8) __U, __R);
756c5857
AI
6342}
6343
79fb4764 6344extern __inline __m512
756c5857 6345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6346_mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
756c5857 6347{
79fb4764
HJ
6348 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
6349 (__v16sf) __B,
6350 (__v16sf)
6351 _mm512_undefined_ps (),
6352 (__mmask16) -1, __R);
756c5857
AI
6353}
6354
79fb4764 6355extern __inline __m512
756c5857 6356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6357_mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
6358 __m512 __B, const int __R)
756c5857 6359{
79fb4764
HJ
6360 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
6361 (__v16sf) __B,
6362 (__v16sf) __W,
6363 (__mmask16) __U, __R);
756c5857
AI
6364}
6365
79fb4764 6366extern __inline __m512
756c5857 6367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6368_mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
756c5857 6369{
79fb4764
HJ
6370 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
6371 (__v16sf) __B,
6372 (__v16sf)
6373 _mm512_setzero_ps (),
6374 (__mmask16) __U, __R);
756c5857
AI
6375}
6376#else
79fb4764
HJ
6377#define _mm512_max_round_pd(A, B, R) \
6378 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
756c5857 6379
79fb4764
HJ
6380#define _mm512_mask_max_round_pd(W, U, A, B, R) \
6381 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
756c5857 6382
79fb4764
HJ
6383#define _mm512_maskz_max_round_pd(U, A, B, R) \
6384 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
756c5857 6385
79fb4764
HJ
6386#define _mm512_max_round_ps(A, B, R) \
6387 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
756c5857 6388
79fb4764
HJ
6389#define _mm512_mask_max_round_ps(W, U, A, B, R) \
6390 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
756c5857 6391
79fb4764
HJ
6392#define _mm512_maskz_max_round_ps(U, A, B, R) \
6393 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
756c5857 6394
79fb4764
HJ
6395#define _mm512_min_round_pd(A, B, R) \
6396 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
756c5857 6397
79fb4764
HJ
6398#define _mm512_mask_min_round_pd(W, U, A, B, R) \
6399 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
6400
6401#define _mm512_maskz_min_round_pd(U, A, B, R) \
6402 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
6403
6404#define _mm512_min_round_ps(A, B, R) \
6405 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
6406
6407#define _mm512_mask_min_round_ps(W, U, A, B, R) \
6408 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
6409
6410#define _mm512_maskz_min_round_ps(U, A, B, R) \
6411 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
756c5857
AI
6412#endif
6413
79fb4764 6414#ifdef __OPTIMIZE__
756c5857
AI
6415extern __inline __m512d
6416__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6417_mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
756c5857 6418{
79fb4764
HJ
6419 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6420 (__v8df) __B,
6421 (__v8df)
6422 _mm512_undefined_pd (),
6423 (__mmask8) -1, __R);
756c5857
AI
6424}
6425
6426extern __inline __m512d
6427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6428_mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
6429 __m512d __B, const int __R)
756c5857 6430{
79fb4764
HJ
6431 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6432 (__v8df) __B,
6433 (__v8df) __W,
6434 (__mmask8) __U, __R);
756c5857
AI
6435}
6436
6437extern __inline __m512d
6438__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6439_mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6440 const int __R)
756c5857 6441{
79fb4764
HJ
6442 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6443 (__v8df) __B,
6444 (__v8df)
6445 _mm512_setzero_pd (),
6446 (__mmask8) __U, __R);
756c5857
AI
6447}
6448
79fb4764 6449extern __inline __m512
756c5857 6450__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6451_mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
756c5857 6452{
79fb4764
HJ
6453 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6454 (__v16sf) __B,
6455 (__v16sf)
6456 _mm512_undefined_ps (),
6457 (__mmask16) -1, __R);
756c5857
AI
6458}
6459
79fb4764 6460extern __inline __m512
756c5857 6461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6462_mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
6463 __m512 __B, const int __R)
756c5857 6464{
79fb4764
HJ
6465 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6466 (__v16sf) __B,
6467 (__v16sf) __W,
6468 (__mmask16) __U, __R);
6469}
6470
6471extern __inline __m512
6472__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6473_mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6474 const int __R)
6475{
6476 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6477 (__v16sf) __B,
6478 (__v16sf)
6479 _mm512_setzero_ps (),
6480 (__mmask16) __U, __R);
756c5857
AI
6481}
6482
79fb4764
HJ
6483#else
6484#define _mm512_scalef_round_pd(A, B, C) \
6485 ((__m512d) \
6486 __builtin_ia32_scalefpd512_mask((A), (B), \
6487 (__v8df) _mm512_undefined_pd(), \
6488 -1, (C)))
6489
6490#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
6491 ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C)))
6492
6493#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
6494 ((__m512d) \
6495 __builtin_ia32_scalefpd512_mask((A), (B), \
6496 (__v8df) _mm512_setzero_pd(), \
6497 (U), (C)))
6498
6499#define _mm512_scalef_round_ps(A, B, C) \
6500 ((__m512) \
6501 __builtin_ia32_scalefps512_mask((A), (B), \
6502 (__v16sf) _mm512_undefined_ps(), \
6503 -1, (C)))
6504
6505#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
6506 ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C)))
6507
6508#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
6509 ((__m512) \
6510 __builtin_ia32_scalefps512_mask((A), (B), \
6511 (__v16sf) _mm512_setzero_ps(), \
6512 (U), (C)))
6513
6514#endif
756c5857 6515
79fb4764
HJ
6516#ifdef __OPTIMIZE__
6517extern __inline __m512d
756c5857 6518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6519_mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
756c5857 6520{
79fb4764
HJ
6521 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
6522 (__v8df) __B,
6523 (__v8df) __C,
6524 (__mmask8) -1, __R);
756c5857
AI
6525}
6526
79fb4764 6527extern __inline __m512d
756c5857 6528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6529_mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6530 __m512d __C, const int __R)
756c5857 6531{
79fb4764
HJ
6532 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
6533 (__v8df) __B,
6534 (__v8df) __C,
6535 (__mmask8) __U, __R);
756c5857
AI
6536}
6537
79fb4764 6538extern __inline __m512d
756c5857 6539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6540_mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
6541 __mmask8 __U, const int __R)
756c5857 6542{
79fb4764
HJ
6543 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
6544 (__v8df) __B,
6545 (__v8df) __C,
6546 (__mmask8) __U, __R);
756c5857
AI
6547}
6548
79fb4764 6549extern __inline __m512d
756c5857 6550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6551_mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6552 __m512d __C, const int __R)
756c5857 6553{
79fb4764
HJ
6554 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
6555 (__v8df) __B,
6556 (__v8df) __C,
6557 (__mmask8) __U, __R);
756c5857
AI
6558}
6559
79fb4764 6560extern __inline __m512
459d21c6 6561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6562_mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
459d21c6 6563{
79fb4764
HJ
6564 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
6565 (__v16sf) __B,
6566 (__v16sf) __C,
6567 (__mmask16) -1, __R);
459d21c6
JJ
6568}
6569
79fb4764 6570extern __inline __m512
459d21c6 6571__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6572_mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6573 __m512 __C, const int __R)
459d21c6 6574{
79fb4764
HJ
6575 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
6576 (__v16sf) __B,
6577 (__v16sf) __C,
6578 (__mmask16) __U, __R);
459d21c6
JJ
6579}
6580
79fb4764 6581extern __inline __m512
459d21c6 6582__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6583_mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
6584 __mmask16 __U, const int __R)
459d21c6 6585{
79fb4764
HJ
6586 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
6587 (__v16sf) __B,
6588 (__v16sf) __C,
6589 (__mmask16) __U, __R);
459d21c6
JJ
6590}
6591
79fb4764 6592extern __inline __m512
459d21c6 6593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6594_mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6595 __m512 __C, const int __R)
459d21c6 6596{
79fb4764
HJ
6597 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
6598 (__v16sf) __B,
6599 (__v16sf) __C,
6600 (__mmask16) __U, __R);
459d21c6
JJ
6601}
6602
79fb4764 6603extern __inline __m512d
459d21c6 6604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6605_mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
459d21c6 6606{
79fb4764
HJ
6607 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
6608 (__v8df) __B,
6609 (__v8df) __C,
6610 (__mmask8) -1, __R);
459d21c6
JJ
6611}
6612
79fb4764 6613extern __inline __m512d
459d21c6 6614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6615_mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6616 __m512d __C, const int __R)
459d21c6 6617{
79fb4764
HJ
6618 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
6619 (__v8df) __B,
6620 (__v8df) __C,
6621 (__mmask8) __U, __R);
459d21c6
JJ
6622}
6623
79fb4764 6624extern __inline __m512d
459d21c6 6625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6626_mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
6627 __mmask8 __U, const int __R)
459d21c6 6628{
79fb4764
HJ
6629 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
6630 (__v8df) __B,
6631 (__v8df) __C,
6632 (__mmask8) __U, __R);
459d21c6
JJ
6633}
6634
79fb4764 6635extern __inline __m512d
459d21c6 6636__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6637_mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6638 __m512d __C, const int __R)
459d21c6 6639{
79fb4764
HJ
6640 return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A,
6641 (__v8df) __B,
6642 (__v8df) __C,
6643 (__mmask8) __U, __R);
459d21c6
JJ
6644}
6645
79fb4764 6646extern __inline __m512
459d21c6 6647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6648_mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
459d21c6 6649{
79fb4764
HJ
6650 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
6651 (__v16sf) __B,
6652 (__v16sf) __C,
6653 (__mmask16) -1, __R);
459d21c6
JJ
6654}
6655
79fb4764 6656extern __inline __m512
459d21c6 6657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6658_mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6659 __m512 __C, const int __R)
459d21c6 6660{
79fb4764
HJ
6661 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
6662 (__v16sf) __B,
6663 (__v16sf) __C,
6664 (__mmask16) __U, __R);
459d21c6
JJ
6665}
6666
79fb4764 6667extern __inline __m512
4c98bdad 6668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6669_mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
6670 __mmask16 __U, const int __R)
4c98bdad 6671{
79fb4764
HJ
6672 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
6673 (__v16sf) __B,
6674 (__v16sf) __C,
6675 (__mmask16) __U, __R);
4c98bdad
SP
6676}
6677
79fb4764 6678extern __inline __m512
756c5857 6679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6680_mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6681 __m512 __C, const int __R)
756c5857 6682{
79fb4764
HJ
6683 return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A,
6684 (__v16sf) __B,
6685 (__v16sf) __C,
6686 (__mmask16) __U, __R);
756c5857
AI
6687}
6688
79fb4764 6689extern __inline __m512d
756c5857 6690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6691_mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
756c5857 6692{
79fb4764
HJ
6693 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
6694 (__v8df) __B,
6695 (__v8df) __C,
6696 (__mmask8) -1, __R);
756c5857
AI
6697}
6698
79fb4764 6699extern __inline __m512d
4c98bdad 6700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6701_mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6702 __m512d __C, const int __R)
4c98bdad 6703{
79fb4764
HJ
6704 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
6705 (__v8df) __B,
6706 (__v8df) __C,
6707 (__mmask8) __U, __R);
4c98bdad
SP
6708}
6709
79fb4764 6710extern __inline __m512d
756c5857 6711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6712_mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
6713 __mmask8 __U, const int __R)
756c5857 6714{
79fb4764
HJ
6715 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
6716 (__v8df) __B,
6717 (__v8df) __C,
6718 (__mmask8) __U, __R);
756c5857
AI
6719}
6720
79fb4764 6721extern __inline __m512d
756c5857 6722__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6723_mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6724 __m512d __C, const int __R)
756c5857 6725{
79fb4764
HJ
6726 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
6727 (__v8df) __B,
6728 (__v8df) __C,
6729 (__mmask8) __U, __R);
756c5857
AI
6730}
6731
79fb4764 6732extern __inline __m512
4c98bdad 6733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6734_mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
4c98bdad 6735{
79fb4764
HJ
6736 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
6737 (__v16sf) __B,
6738 (__v16sf) __C,
6739 (__mmask16) -1, __R);
4c98bdad
SP
6740}
6741
79fb4764 6742extern __inline __m512
756c5857 6743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6744_mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6745 __m512 __C, const int __R)
756c5857 6746{
79fb4764
HJ
6747 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
6748 (__v16sf) __B,
6749 (__v16sf) __C,
6750 (__mmask16) __U, __R);
756c5857
AI
6751}
6752
79fb4764 6753extern __inline __m512
756c5857 6754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6755_mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
6756 __mmask16 __U, const int __R)
756c5857 6757{
79fb4764
HJ
6758 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
6759 (__v16sf) __B,
6760 (__v16sf) __C,
6761 (__mmask16) __U, __R);
756c5857
AI
6762}
6763
79fb4764 6764extern __inline __m512
756c5857 6765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6766_mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6767 __m512 __C, const int __R)
756c5857 6768{
79fb4764
HJ
6769 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
6770 (__v16sf) __B,
6771 (__v16sf) __C,
6772 (__mmask16) __U, __R);
756c5857
AI
6773}
6774
79fb4764 6775extern __inline __m512d
4c98bdad 6776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6777_mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
4c98bdad 6778{
79fb4764
HJ
6779 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
6780 (__v8df) __B,
6781 -(__v8df) __C,
6782 (__mmask8) -1, __R);
4c98bdad
SP
6783}
6784
79fb4764 6785extern __inline __m512d
756c5857 6786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6787_mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6788 __m512d __C, const int __R)
756c5857 6789{
79fb4764
HJ
6790 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
6791 (__v8df) __B,
6792 -(__v8df) __C,
6793 (__mmask8) __U, __R);
756c5857
AI
6794}
6795
6796extern __inline __m512d
6797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6798_mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
6799 __mmask8 __U, const int __R)
756c5857 6800{
79fb4764
HJ
6801 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
6802 (__v8df) __B,
6803 (__v8df) __C,
6804 (__mmask8) __U, __R);
756c5857
AI
6805}
6806
6807extern __inline __m512d
6808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6809_mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6810 __m512d __C, const int __R)
756c5857 6811{
79fb4764
HJ
6812 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
6813 (__v8df) __B,
6814 -(__v8df) __C,
6815 (__mmask8) __U, __R);
756c5857
AI
6816}
6817
79fb4764 6818extern __inline __m512
756c5857 6819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6820_mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
756c5857 6821{
79fb4764
HJ
6822 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
6823 (__v16sf) __B,
6824 -(__v16sf) __C,
6825 (__mmask16) -1, __R);
756c5857
AI
6826}
6827
6828extern __inline __m512
6829__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6830_mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6831 __m512 __C, const int __R)
756c5857 6832{
79fb4764
HJ
6833 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
6834 (__v16sf) __B,
6835 -(__v16sf) __C,
6836 (__mmask16) __U, __R);
756c5857
AI
6837}
6838
6839extern __inline __m512
6840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6841_mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
6842 __mmask16 __U, const int __R)
756c5857 6843{
79fb4764
HJ
6844 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
6845 (__v16sf) __B,
6846 (__v16sf) __C,
6847 (__mmask16) __U, __R);
756c5857
AI
6848}
6849
6850extern __inline __m512
6851__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6852_mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6853 __m512 __C, const int __R)
756c5857 6854{
79fb4764
HJ
6855 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
6856 (__v16sf) __B,
6857 -(__v16sf) __C,
6858 (__mmask16) __U, __R);
756c5857
AI
6859}
6860
79fb4764 6861extern __inline __m512d
756c5857 6862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6863_mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
756c5857 6864{
79fb4764
HJ
6865 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
6866 (__v8df) __B,
6867 (__v8df) __C,
6868 (__mmask8) -1, __R);
756c5857
AI
6869}
6870
79fb4764 6871extern __inline __m512d
756c5857 6872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6873_mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6874 __m512d __C, const int __R)
756c5857 6875{
79fb4764
HJ
6876 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
6877 (__v8df) __B,
6878 (__v8df) __C,
6879 (__mmask8) __U, __R);
756c5857
AI
6880}
6881
79fb4764 6882extern __inline __m512d
756c5857 6883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6884_mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
6885 __mmask8 __U, const int __R)
756c5857 6886{
79fb4764
HJ
6887 return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A,
6888 (__v8df) __B,
6889 (__v8df) __C,
6890 (__mmask8) __U, __R);
756c5857
AI
6891}
6892
79fb4764 6893extern __inline __m512d
756c5857 6894__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6895_mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6896 __m512d __C, const int __R)
756c5857 6897{
79fb4764
HJ
6898 return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A,
6899 (__v8df) __B,
6900 (__v8df) __C,
6901 (__mmask8) __U, __R);
756c5857
AI
6902}
6903
79fb4764 6904extern __inline __m512
756c5857 6905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6906_mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
756c5857 6907{
79fb4764
HJ
6908 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
6909 (__v16sf) __B,
6910 (__v16sf) __C,
6911 (__mmask16) -1, __R);
756c5857
AI
6912}
6913
79fb4764 6914extern __inline __m512
756c5857 6915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6916_mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6917 __m512 __C, const int __R)
756c5857 6918{
79fb4764
HJ
6919 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
6920 (__v16sf) __B,
6921 (__v16sf) __C,
6922 (__mmask16) __U, __R);
756c5857
AI
6923}
6924
79fb4764 6925extern __inline __m512
756c5857 6926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6927_mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
6928 __mmask16 __U, const int __R)
756c5857 6929{
79fb4764
HJ
6930 return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A,
6931 (__v16sf) __B,
6932 (__v16sf) __C,
6933 (__mmask16) __U, __R);
756c5857
AI
6934}
6935
79fb4764 6936extern __inline __m512
756c5857 6937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6938_mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6939 __m512 __C, const int __R)
756c5857 6940{
79fb4764
HJ
6941 return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A,
6942 (__v16sf) __B,
6943 (__v16sf) __C,
6944 (__mmask16) __U, __R);
756c5857
AI
6945}
6946
6947extern __inline __m512d
6948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6949_mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
756c5857 6950{
79fb4764
HJ
6951 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
6952 (__v8df) __B,
6953 (__v8df) __C,
6954 (__mmask8) -1, __R);
756c5857
AI
6955}
6956
6957extern __inline __m512d
6958__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6959_mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6960 __m512d __C, const int __R)
756c5857 6961{
79fb4764
HJ
6962 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
6963 (__v8df) __B,
6964 (__v8df) __C,
6965 (__mmask8) __U, __R);
756c5857
AI
6966}
6967
6968extern __inline __m512d
6969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6970_mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
6971 __mmask8 __U, const int __R)
756c5857 6972{
79fb4764
HJ
6973 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
6974 (__v8df) __B,
6975 (__v8df) __C,
6976 (__mmask8) __U, __R);
756c5857
AI
6977}
6978
6979extern __inline __m512d
6980__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
6981_mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6982 __m512d __C, const int __R)
756c5857 6983{
79fb4764
HJ
6984 return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A,
6985 (__v8df) __B,
6986 (__v8df) __C,
6987 (__mmask8) __U, __R);
756c5857
AI
6988}
6989
6990extern __inline __m512
6991__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 6992_mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
756c5857 6993{
79fb4764
HJ
6994 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
6995 (__v16sf) __B,
6996 (__v16sf) __C,
6997 (__mmask16) -1, __R);
756c5857
AI
6998}
6999
7000extern __inline __m512
7001__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
7002_mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
7003 __m512 __C, const int __R)
756c5857 7004{
79fb4764
HJ
7005 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
7006 (__v16sf) __B,
7007 (__v16sf) __C,
7008 (__mmask16) __U, __R);
756c5857
AI
7009}
7010
7011extern __inline __m512
7012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
7013_mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
7014 __mmask16 __U, const int __R)
756c5857 7015{
79fb4764
HJ
7016 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
7017 (__v16sf) __B,
7018 (__v16sf) __C,
7019 (__mmask16) __U, __R);
756c5857
AI
7020}
7021
7022extern __inline __m512
7023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
7024_mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
7025 __m512 __C, const int __R)
756c5857 7026{
79fb4764
HJ
7027 return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A,
7028 (__v16sf) __B,
7029 (__v16sf) __C,
7030 (__mmask16) __U, __R);
756c5857 7031}
79fb4764
HJ
7032#else
7033#define _mm512_fmadd_round_pd(A, B, C, R) \
7034 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
756c5857 7035
79fb4764
HJ
7036#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
7037 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
7038
7039#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
7040 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
7041
7042#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
7043 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
7044
7045#define _mm512_fmadd_round_ps(A, B, C, R) \
7046 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
7047
7048#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
7049 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
7050
7051#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
7052 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
7053
7054#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
7055 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
7056
7057#define _mm512_fmsub_round_pd(A, B, C, R) \
7058 (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, -1, R)
7059
7060#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
7061 (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, U, R)
7062
7063#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
7064 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
7065
7066#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
7067 (__m512d)__builtin_ia32_vfmsubpd512_maskz(A, B, C, U, R)
7068
7069#define _mm512_fmsub_round_ps(A, B, C, R) \
7070 (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, -1, R)
7071
7072#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
7073 (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, U, R)
7074
7075#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
7076 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
7077
7078#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
7079 (__m512)__builtin_ia32_vfmsubps512_maskz(A, B, C, U, R)
7080
7081#define _mm512_fmaddsub_round_pd(A, B, C, R) \
7082 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
7083
7084#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
7085 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, U, R)
7086
7087#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
7088 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
7089
7090#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
7091 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
7092
7093#define _mm512_fmaddsub_round_ps(A, B, C, R) \
7094 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
7095
7096#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
7097 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
7098
7099#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
7100 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
7101
7102#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
7103 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
7104
7105#define _mm512_fmsubadd_round_pd(A, B, C, R) \
7106 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
7107
7108#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
7109 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
7110
7111#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
7112 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
7113
7114#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
7115 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
7116
7117#define _mm512_fmsubadd_round_ps(A, B, C, R) \
7118 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
7119
7120#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
7121 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
7122
7123#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
7124 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
7125
7126#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
7127 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
7128
7129#define _mm512_fnmadd_round_pd(A, B, C, R) \
7130 (__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, -1, R)
7131
7132#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
7133 (__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, U, R)
7134
7135#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
7136 (__m512d)__builtin_ia32_vfnmaddpd512_mask3(A, B, C, U, R)
7137
7138#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
7139 (__m512d)__builtin_ia32_vfnmaddpd512_maskz(A, B, C, U, R)
7140
7141#define _mm512_fnmadd_round_ps(A, B, C, R) \
7142 (__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, -1, R)
7143
7144#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
7145 (__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, U, R)
7146
7147#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
7148 (__m512)__builtin_ia32_vfnmaddps512_mask3(A, B, C, U, R)
7149
7150#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
7151 (__m512)__builtin_ia32_vfnmaddps512_maskz(A, B, C, U, R)
7152
7153#define _mm512_fnmsub_round_pd(A, B, C, R) \
7154 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, -1, R)
7155
7156#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
7157 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
7158
7159#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
7160 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
7161
7162#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
7163 (__m512d)__builtin_ia32_vfnmsubpd512_maskz(A, B, C, U, R)
7164
7165#define _mm512_fnmsub_round_ps(A, B, C, R) \
7166 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, -1, R)
7167
7168#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
7169 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
7170
7171#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
7172 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
7173
7174#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
7175 (__m512)__builtin_ia32_vfnmsubps512_maskz(A, B, C, U, R)
7176#endif
7177
7178extern __inline __m512i
756c5857 7179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7180_mm512_abs_epi64 (__m512i __A)
756c5857 7181{
79fb4764
HJ
7182 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
7183 (__v8di)
7184 _mm512_undefined_epi32 (),
7185 (__mmask8) -1);
756c5857
AI
7186}
7187
79fb4764 7188extern __inline __m512i
756c5857 7189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7190_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
756c5857 7191{
79fb4764
HJ
7192 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
7193 (__v8di) __W,
7194 (__mmask8) __U);
756c5857
AI
7195}
7196
79fb4764 7197extern __inline __m512i
756c5857 7198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7199_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
756c5857 7200{
79fb4764
HJ
7201 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
7202 (__v8di)
7203 _mm512_setzero_si512 (),
7204 (__mmask8) __U);
756c5857
AI
7205}
7206
79fb4764 7207extern __inline __m512i
756c5857 7208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7209_mm512_abs_epi32 (__m512i __A)
756c5857 7210{
79fb4764
HJ
7211 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
7212 (__v16si)
7213 _mm512_undefined_epi32 (),
7214 (__mmask16) -1);
756c5857
AI
7215}
7216
79fb4764 7217extern __inline __m512i
756c5857 7218__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7219_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
756c5857 7220{
79fb4764
HJ
7221 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
7222 (__v16si) __W,
7223 (__mmask16) __U);
756c5857
AI
7224}
7225
79fb4764 7226extern __inline __m512i
756c5857 7227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7228_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
756c5857 7229{
79fb4764
HJ
7230 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
7231 (__v16si)
7232 _mm512_setzero_si512 (),
7233 (__mmask16) __U);
756c5857 7234}
756c5857 7235
79fb4764 7236extern __inline __m512
756c5857 7237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7238_mm512_broadcastss_ps (__m128 __A)
756c5857 7239{
79fb4764
HJ
7240 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
7241 (__v16sf)
7242 _mm512_undefined_ps (),
7243 (__mmask16) -1);
756c5857
AI
7244}
7245
79fb4764 7246extern __inline __m512
756c5857 7247__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7248_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
756c5857 7249{
79fb4764
HJ
7250 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
7251 (__v16sf) __O, __M);
756c5857
AI
7252}
7253
79fb4764 7254extern __inline __m512
756c5857 7255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7256_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
756c5857 7257{
79fb4764
HJ
7258 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
7259 (__v16sf)
7260 _mm512_setzero_ps (),
7261 __M);
756c5857
AI
7262}
7263
7264extern __inline __m512d
7265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7266_mm512_broadcastsd_pd (__m128d __A)
756c5857 7267{
79fb4764 7268 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
756c5857 7269 (__v8df)
0b192937 7270 _mm512_undefined_pd (),
756c5857
AI
7271 (__mmask8) -1);
7272}
7273
7274extern __inline __m512d
7275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7276_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
756c5857 7277{
79fb4764
HJ
7278 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
7279 (__v8df) __O, __M);
756c5857
AI
7280}
7281
7282extern __inline __m512d
7283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7284_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
756c5857 7285{
79fb4764 7286 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
756c5857
AI
7287 (__v8df)
7288 _mm512_setzero_pd (),
79fb4764 7289 __M);
756c5857 7290}
756c5857 7291
79fb4764
HJ
7292extern __inline __m512i
7293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7294_mm512_broadcastd_epi32 (__m128i __A)
7295{
7296 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
7297 (__v16si)
7298 _mm512_undefined_epi32 (),
7299 (__mmask16) -1);
7300}
756c5857 7301
79fb4764
HJ
7302extern __inline __m512i
7303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7304_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
7305{
7306 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
7307 (__v16si) __O, __M);
7308}
756c5857 7309
79fb4764
HJ
7310extern __inline __m512i
7311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7312_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
7313{
7314 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
7315 (__v16si)
7316 _mm512_setzero_si512 (),
7317 __M);
7318}
756c5857 7319
79fb4764
HJ
7320extern __inline __m512i
7321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7322_mm512_set1_epi32 (int __A)
7323{
7324 return (__m512i)(__v16si)
7325 { __A, __A, __A, __A, __A, __A, __A, __A,
7326 __A, __A, __A, __A, __A, __A, __A, __A };
7327}
756c5857
AI
7328
7329extern __inline __m512i
7330__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7331_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
756c5857 7332{
79fb4764
HJ
7333 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
7334 __M);
756c5857
AI
7335}
7336
7337extern __inline __m512i
7338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7339_mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
756c5857 7340{
79fb4764
HJ
7341 return (__m512i)
7342 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
7343 (__v16si) _mm512_setzero_si512 (),
7344 __M);
756c5857
AI
7345}
7346
7347extern __inline __m512i
7348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7349_mm512_broadcastq_epi64 (__m128i __A)
756c5857 7350{
79fb4764
HJ
7351 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
7352 (__v8di)
7353 _mm512_undefined_epi32 (),
7354 (__mmask8) -1);
756c5857
AI
7355}
7356
7357extern __inline __m512i
7358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7359_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
756c5857 7360{
79fb4764
HJ
7361 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
7362 (__v8di) __O, __M);
756c5857
AI
7363}
7364
7365extern __inline __m512i
7366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7367_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
756c5857 7368{
79fb4764
HJ
7369 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
7370 (__v8di)
7371 _mm512_setzero_si512 (),
7372 __M);
756c5857
AI
7373}
7374
7375extern __inline __m512i
7376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7377_mm512_set1_epi64 (long long __A)
756c5857 7378{
79fb4764 7379 return (__m512i)(__v8di) { __A, __A, __A, __A, __A, __A, __A, __A };
756c5857
AI
7380}
7381
79fb4764 7382extern __inline __m512i
756c5857 7383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7384_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
756c5857 7385{
79fb4764
HJ
7386 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
7387 __M);
756c5857
AI
7388}
7389
79fb4764 7390extern __inline __m512i
756c5857 7391__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7392_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
756c5857 7393{
79fb4764
HJ
7394 return (__m512i)
7395 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
7396 (__v8di) _mm512_setzero_si512 (),
7397 __M);
756c5857
AI
7398}
7399
79fb4764 7400extern __inline __m512
756c5857 7401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7402_mm512_broadcast_f32x4 (__m128 __A)
756c5857 7403{
79fb4764
HJ
7404 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
7405 (__v16sf)
7406 _mm512_undefined_ps (),
7407 (__mmask16) -1);
756c5857
AI
7408}
7409
7410extern __inline __m512
7411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7412_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
756c5857 7413{
79fb4764
HJ
7414 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
7415 (__v16sf) __O,
7416 __M);
756c5857
AI
7417}
7418
7419extern __inline __m512
7420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7421_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
756c5857 7422{
79fb4764
HJ
7423 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
7424 (__v16sf)
7425 _mm512_setzero_ps (),
7426 __M);
756c5857
AI
7427}
7428
79fb4764 7429extern __inline __m512i
756c5857 7430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7431_mm512_broadcast_i32x4 (__m128i __A)
756c5857 7432{
79fb4764
HJ
7433 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
7434 (__v16si)
7435 _mm512_undefined_epi32 (),
7436 (__mmask16) -1);
756c5857
AI
7437}
7438
79fb4764 7439extern __inline __m512i
756c5857 7440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7441_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
756c5857 7442{
79fb4764
HJ
7443 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
7444 (__v16si) __O,
7445 __M);
756c5857
AI
7446}
7447
79fb4764 7448extern __inline __m512i
756c5857 7449__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7450_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
756c5857 7451{
79fb4764
HJ
7452 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
7453 (__v16si)
7454 _mm512_setzero_si512 (),
7455 __M);
756c5857
AI
7456}
7457
79fb4764 7458extern __inline __m512d
756c5857 7459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7460_mm512_broadcast_f64x4 (__m256d __A)
756c5857 7461{
79fb4764
HJ
7462 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
7463 (__v8df)
7464 _mm512_undefined_pd (),
7465 (__mmask8) -1);
756c5857
AI
7466}
7467
7468extern __inline __m512d
7469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7470_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
756c5857 7471{
79fb4764
HJ
7472 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
7473 (__v8df) __O,
7474 __M);
756c5857
AI
7475}
7476
7477extern __inline __m512d
7478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7479_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
756c5857 7480{
79fb4764
HJ
7481 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
7482 (__v8df)
7483 _mm512_setzero_pd (),
7484 __M);
756c5857
AI
7485}
7486
79fb4764 7487extern __inline __m512i
756c5857 7488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7489_mm512_broadcast_i64x4 (__m256i __A)
756c5857 7490{
79fb4764
HJ
7491 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
7492 (__v8di)
7493 _mm512_undefined_epi32 (),
7494 (__mmask8) -1);
756c5857
AI
7495}
7496
79fb4764 7497extern __inline __m512i
756c5857 7498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7499_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
756c5857 7500{
79fb4764
HJ
7501 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
7502 (__v8di) __O,
7503 __M);
756c5857
AI
7504}
7505
79fb4764 7506extern __inline __m512i
756c5857 7507__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7508_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
756c5857 7509{
79fb4764
HJ
7510 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
7511 (__v8di)
7512 _mm512_setzero_si512 (),
7513 __M);
756c5857
AI
7514}
7515
79fb4764
HJ
7516typedef enum
7517{
7518 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
7519 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
7520 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
7521 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
7522 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
7523 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
7524 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
7525 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
7526 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
7527 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
7528 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
7529 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
7530 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
7531 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
7532 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
7533 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
7534 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
7535 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
7536 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
7537 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
7538 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
7539 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
7540 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
7541 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
7542 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
7543 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
7544 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
7545 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
7546 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
7547 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
7548 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
7549 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
7550 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
7551 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
7552 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
7553 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
7554 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
7555 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
7556 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
7557 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
7558 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
7559 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
7560 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
7561 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
7562 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
7563 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
7564 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
7565 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
7566 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
7567 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
7568 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
7569 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
7570 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
7571 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
7572 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
7573 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
7574 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
7575 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
7576 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
7577 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
7578 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
7579 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
7580 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
7581 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
7582 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
7583 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
7584 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
7585 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
7586 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
7587 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
7588 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
7589 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
7590 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
7591 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
7592 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
7593 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
7594 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
7595 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
7596 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
7597 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
7598 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
7599 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
7600 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
7601 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
7602 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
7603 _MM_PERM_DDDD = 0xFF
7604} _MM_PERM_ENUM;
7605
7606#ifdef __OPTIMIZE__
7607extern __inline __m512i
756c5857 7608__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7609_mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
756c5857 7610{
79fb4764
HJ
7611 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
7612 __mask,
7613 (__v16si)
7614 _mm512_undefined_epi32 (),
7615 (__mmask16) -1);
756c5857
AI
7616}
7617
79fb4764 7618extern __inline __m512i
756c5857 7619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
7620_mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7621 _MM_PERM_ENUM __mask)
756c5857 7622{
79fb4764
HJ
7623 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
7624 __mask,
7625 (__v16si) __W,
7626 (__mmask16) __U);
756c5857
AI
7627}
7628
79fb4764 7629extern __inline __m512i
756c5857 7630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7631_mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
756c5857 7632{
79fb4764
HJ
7633 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
7634 __mask,
7635 (__v16si)
7636 _mm512_setzero_si512 (),
7637 (__mmask16) __U);
756c5857
AI
7638}
7639
79fb4764 7640extern __inline __m512i
756c5857 7641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7642_mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
756c5857 7643{
79fb4764
HJ
7644 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
7645 (__v8di) __B, __imm,
7646 (__v8di)
7647 _mm512_undefined_epi32 (),
7648 (__mmask8) -1);
756c5857
AI
7649}
7650
79fb4764 7651extern __inline __m512i
756c5857 7652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
7653_mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
7654 __m512i __B, const int __imm)
756c5857 7655{
79fb4764
HJ
7656 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
7657 (__v8di) __B, __imm,
7658 (__v8di) __W,
7659 (__mmask8) __U);
756c5857
AI
7660}
7661
79fb4764 7662extern __inline __m512i
756c5857 7663__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
7664_mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
7665 const int __imm)
756c5857 7666{
79fb4764
HJ
7667 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
7668 (__v8di) __B, __imm,
7669 (__v8di)
7670 _mm512_setzero_si512 (),
7671 (__mmask8) __U);
756c5857
AI
7672}
7673
79fb4764 7674extern __inline __m512i
756c5857 7675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7676_mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
756c5857 7677{
79fb4764
HJ
7678 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
7679 (__v16si) __B,
7680 __imm,
7681 (__v16si)
7682 _mm512_undefined_epi32 (),
7683 (__mmask16) -1);
756c5857
AI
7684}
7685
79fb4764 7686extern __inline __m512i
756c5857 7687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
7688_mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
7689 __m512i __B, const int __imm)
756c5857 7690{
79fb4764
HJ
7691 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
7692 (__v16si) __B,
7693 __imm,
7694 (__v16si) __W,
7695 (__mmask16) __U);
756c5857
AI
7696}
7697
79fb4764 7698extern __inline __m512i
756c5857 7699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
7700_mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
7701 const int __imm)
756c5857 7702{
79fb4764
HJ
7703 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
7704 (__v16si) __B,
7705 __imm,
7706 (__v16si)
7707 _mm512_setzero_si512 (),
7708 (__mmask16) __U);
756c5857
AI
7709}
7710
79fb4764 7711extern __inline __m512d
756c5857 7712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7713_mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
756c5857 7714{
79fb4764
HJ
7715 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
7716 (__v8df) __B, __imm,
7717 (__v8df)
7718 _mm512_undefined_pd (),
7719 (__mmask8) -1);
756c5857
AI
7720}
7721
79fb4764 7722extern __inline __m512d
756c5857 7723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
7724_mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
7725 __m512d __B, const int __imm)
756c5857 7726{
79fb4764
HJ
7727 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
7728 (__v8df) __B, __imm,
7729 (__v8df) __W,
7730 (__mmask8) __U);
756c5857
AI
7731}
7732
79fb4764 7733extern __inline __m512d
756c5857 7734__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
7735_mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
7736 const int __imm)
756c5857 7737{
79fb4764
HJ
7738 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
7739 (__v8df) __B, __imm,
7740 (__v8df)
7741 _mm512_setzero_pd (),
7742 (__mmask8) __U);
756c5857
AI
7743}
7744
7745extern __inline __m512
7746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7747_mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
756c5857 7748{
79fb4764
HJ
7749 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
7750 (__v16sf) __B, __imm,
7751 (__v16sf)
7752 _mm512_undefined_ps (),
7753 (__mmask16) -1);
756c5857
AI
7754}
7755
7756extern __inline __m512
7757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
7758_mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
7759 __m512 __B, const int __imm)
756c5857 7760{
79fb4764
HJ
7761 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
7762 (__v16sf) __B, __imm,
7763 (__v16sf) __W,
7764 (__mmask16) __U);
756c5857
AI
7765}
7766
7767extern __inline __m512
7768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
7769_mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
7770 const int __imm)
756c5857 7771{
79fb4764
HJ
7772 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
7773 (__v16sf) __B, __imm,
7774 (__v16sf)
7775 _mm512_setzero_ps (),
7776 (__mmask16) __U);
756c5857
AI
7777}
7778
79fb4764
HJ
7779#else
7780#define _mm512_shuffle_epi32(X, C) \
7781 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
7782 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
7783 (__mmask16)-1))
7784
7785#define _mm512_mask_shuffle_epi32(W, U, X, C) \
7786 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
7787 (__v16si)(__m512i)(W),\
7788 (__mmask16)(U)))
7789
7790#define _mm512_maskz_shuffle_epi32(U, X, C) \
7791 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
7792 (__v16si)(__m512i)_mm512_setzero_si512 (),\
7793 (__mmask16)(U)))
7794
7795#define _mm512_shuffle_i64x2(X, Y, C) \
7796 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
7797 (__v8di)(__m512i)(Y), (int)(C),\
7798 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
7799 (__mmask8)-1))
7800
7801#define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
7802 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
7803 (__v8di)(__m512i)(Y), (int)(C),\
7804 (__v8di)(__m512i)(W),\
7805 (__mmask8)(U)))
7806
7807#define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
7808 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
7809 (__v8di)(__m512i)(Y), (int)(C),\
7810 (__v8di)(__m512i)_mm512_setzero_si512 (),\
7811 (__mmask8)(U)))
7812
7813#define _mm512_shuffle_i32x4(X, Y, C) \
7814 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
7815 (__v16si)(__m512i)(Y), (int)(C),\
7816 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
7817 (__mmask16)-1))
7818
7819#define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
7820 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
7821 (__v16si)(__m512i)(Y), (int)(C),\
7822 (__v16si)(__m512i)(W),\
7823 (__mmask16)(U)))
7824
7825#define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
7826 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
7827 (__v16si)(__m512i)(Y), (int)(C),\
7828 (__v16si)(__m512i)_mm512_setzero_si512 (),\
7829 (__mmask16)(U)))
7830
7831#define _mm512_shuffle_f64x2(X, Y, C) \
7832 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
7833 (__v8df)(__m512d)(Y), (int)(C),\
7834 (__v8df)(__m512d)_mm512_undefined_pd(),\
7835 (__mmask8)-1))
7836
7837#define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
7838 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
7839 (__v8df)(__m512d)(Y), (int)(C),\
7840 (__v8df)(__m512d)(W),\
7841 (__mmask8)(U)))
7842
7843#define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
7844 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
7845 (__v8df)(__m512d)(Y), (int)(C),\
7846 (__v8df)(__m512d)_mm512_setzero_pd(),\
7847 (__mmask8)(U)))
7848
7849#define _mm512_shuffle_f32x4(X, Y, C) \
7850 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
7851 (__v16sf)(__m512)(Y), (int)(C),\
7852 (__v16sf)(__m512)_mm512_undefined_ps(),\
7853 (__mmask16)-1))
7854
7855#define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
7856 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
7857 (__v16sf)(__m512)(Y), (int)(C),\
7858 (__v16sf)(__m512)(W),\
7859 (__mmask16)(U)))
7860
7861#define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
7862 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
7863 (__v16sf)(__m512)(Y), (int)(C),\
7864 (__v16sf)(__m512)_mm512_setzero_ps(),\
7865 (__mmask16)(U)))
7866#endif
7867
7868extern __inline __m512i
756c5857 7869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7870_mm512_rolv_epi32 (__m512i __A, __m512i __B)
756c5857 7871{
79fb4764
HJ
7872 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
7873 (__v16si) __B,
7874 (__v16si)
7875 _mm512_undefined_epi32 (),
7876 (__mmask16) -1);
756c5857
AI
7877}
7878
7879extern __inline __m512i
7880__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7881_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
756c5857 7882{
79fb4764
HJ
7883 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
7884 (__v16si) __B,
7885 (__v16si) __W,
7886 (__mmask16) __U);
756c5857
AI
7887}
7888
7889extern __inline __m512i
7890__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7891_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
756c5857 7892{
79fb4764
HJ
7893 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
7894 (__v16si) __B,
7895 (__v16si)
7896 _mm512_setzero_si512 (),
7897 (__mmask16) __U);
756c5857
AI
7898}
7899
7900extern __inline __m512i
7901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7902_mm512_rorv_epi32 (__m512i __A, __m512i __B)
756c5857 7903{
79fb4764
HJ
7904 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
7905 (__v16si) __B,
7906 (__v16si)
7907 _mm512_undefined_epi32 (),
7908 (__mmask16) -1);
756c5857
AI
7909}
7910
7911extern __inline __m512i
7912__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7913_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
756c5857 7914{
79fb4764
HJ
7915 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
7916 (__v16si) __B,
7917 (__v16si) __W,
7918 (__mmask16) __U);
756c5857
AI
7919}
7920
7921extern __inline __m512i
7922__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7923_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
756c5857 7924{
79fb4764
HJ
7925 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
7926 (__v16si) __B,
7927 (__v16si)
7928 _mm512_setzero_si512 (),
7929 (__mmask16) __U);
756c5857
AI
7930}
7931
7932extern __inline __m512i
7933__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7934_mm512_rolv_epi64 (__m512i __A, __m512i __B)
756c5857 7935{
79fb4764
HJ
7936 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
7937 (__v8di) __B,
7938 (__v8di)
7939 _mm512_undefined_epi32 (),
7940 (__mmask8) -1);
756c5857
AI
7941}
7942
7943extern __inline __m512i
7944__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7945_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
756c5857 7946{
79fb4764
HJ
7947 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
7948 (__v8di) __B,
7949 (__v8di) __W,
7950 (__mmask8) __U);
756c5857
AI
7951}
7952
7953extern __inline __m512i
7954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7955_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
756c5857 7956{
79fb4764
HJ
7957 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
7958 (__v8di) __B,
7959 (__v8di)
7960 _mm512_setzero_si512 (),
7961 (__mmask8) __U);
756c5857
AI
7962}
7963
7964extern __inline __m512i
7965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7966_mm512_rorv_epi64 (__m512i __A, __m512i __B)
756c5857 7967{
79fb4764
HJ
7968 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
7969 (__v8di) __B,
7970 (__v8di)
7971 _mm512_undefined_epi32 (),
7972 (__mmask8) -1);
756c5857
AI
7973}
7974
7975extern __inline __m512i
7976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7977_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
756c5857 7978{
79fb4764
HJ
7979 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
7980 (__v8di) __B,
7981 (__v8di) __W,
7982 (__mmask8) __U);
756c5857
AI
7983}
7984
7985extern __inline __m512i
7986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7987_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
756c5857 7988{
79fb4764
HJ
7989 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
7990 (__v8di) __B,
7991 (__v8di)
7992 _mm512_setzero_si512 (),
7993 (__mmask8) __U);
756c5857
AI
7994}
7995
79fb4764
HJ
7996#ifdef __OPTIMIZE__
7997extern __inline __m256i
756c5857 7998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 7999_mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
756c5857 8000{
79fb4764
HJ
8001 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
8002 (__v8si)
8003 _mm256_undefined_si256 (),
8004 (__mmask8) -1, __R);
756c5857
AI
8005}
8006
79fb4764 8007extern __inline __m256i
756c5857 8008__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
8009_mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
8010 const int __R)
756c5857 8011{
79fb4764
HJ
8012 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
8013 (__v8si) __W,
8014 (__mmask8) __U, __R);
8015}
8016
8017extern __inline __m256i
8018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8019_mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
8020{
8021 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
8022 (__v8si)
8023 _mm256_setzero_si256 (),
8024 (__mmask8) __U, __R);
756c5857
AI
8025}
8026
79fb4764 8027extern __inline __m256i
756c5857 8028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8029_mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
756c5857 8030{
79fb4764
HJ
8031 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
8032 (__v8si)
8033 _mm256_undefined_si256 (),
8034 (__mmask8) -1, __R);
756c5857
AI
8035}
8036
79fb4764 8037extern __inline __m256i
756c5857 8038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
8039_mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
8040 const int __R)
756c5857 8041{
79fb4764
HJ
8042 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
8043 (__v8si) __W,
8044 (__mmask8) __U, __R);
756c5857
AI
8045}
8046
79fb4764 8047extern __inline __m256i
756c5857 8048__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8049_mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
756c5857 8050{
79fb4764
HJ
8051 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
8052 (__v8si)
8053 _mm256_setzero_si256 (),
8054 (__mmask8) __U, __R);
756c5857 8055}
79fb4764
HJ
8056#else
8057#define _mm512_cvtt_roundpd_epi32(A, B) \
8058 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
756c5857 8059
79fb4764
HJ
8060#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
8061 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
8062
8063#define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
8064 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
8065
8066#define _mm512_cvtt_roundpd_epu32(A, B) \
8067 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
8068
8069#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
8070 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
8071
8072#define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
8073 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
8074#endif
8075
8076#ifdef __OPTIMIZE__
8077extern __inline __m256i
756c5857 8078__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8079_mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
756c5857 8080{
79fb4764
HJ
8081 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
8082 (__v8si)
8083 _mm256_undefined_si256 (),
8084 (__mmask8) -1, __R);
756c5857
AI
8085}
8086
79fb4764 8087extern __inline __m256i
756c5857 8088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
8089_mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
8090 const int __R)
756c5857 8091{
79fb4764
HJ
8092 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
8093 (__v8si) __W,
8094 (__mmask8) __U, __R);
756c5857
AI
8095}
8096
79fb4764 8097extern __inline __m256i
756c5857 8098__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8099_mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
756c5857 8100{
79fb4764
HJ
8101 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
8102 (__v8si)
8103 _mm256_setzero_si256 (),
8104 (__mmask8) __U, __R);
756c5857
AI
8105}
8106
79fb4764 8107extern __inline __m256i
756c5857 8108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8109_mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
756c5857 8110{
79fb4764
HJ
8111 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
8112 (__v8si)
8113 _mm256_undefined_si256 (),
8114 (__mmask8) -1, __R);
756c5857
AI
8115}
8116
79fb4764 8117extern __inline __m256i
756c5857 8118__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
8119_mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
8120 const int __R)
756c5857 8121{
79fb4764
HJ
8122 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
8123 (__v8si) __W,
8124 (__mmask8) __U, __R);
756c5857
AI
8125}
8126
79fb4764 8127extern __inline __m256i
756c5857 8128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8129_mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
756c5857 8130{
79fb4764
HJ
8131 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
8132 (__v8si)
8133 _mm256_setzero_si256 (),
8134 (__mmask8) __U, __R);
756c5857 8135}
79fb4764
HJ
8136#else
8137#define _mm512_cvt_roundpd_epi32(A, B) \
8138 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
8139
8140#define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
8141 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
8142
8143#define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
8144 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
8145
8146#define _mm512_cvt_roundpd_epu32(A, B) \
8147 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
8148
8149#define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
8150 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
8151
8152#define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
8153 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
8154#endif
756c5857 8155
79fb4764 8156#ifdef __OPTIMIZE__
756c5857
AI
8157extern __inline __m512i
8158__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8159_mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
756c5857 8160{
79fb4764
HJ
8161 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
8162 (__v16si)
8163 _mm512_undefined_epi32 (),
8164 (__mmask16) -1, __R);
756c5857
AI
8165}
8166
8167extern __inline __m512i
8168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
8169_mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
8170 const int __R)
756c5857 8171{
79fb4764
HJ
8172 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
8173 (__v16si) __W,
8174 (__mmask16) __U, __R);
756c5857
AI
8175}
8176
8177extern __inline __m512i
8178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8179_mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
756c5857 8180{
79fb4764
HJ
8181 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
8182 (__v16si)
8183 _mm512_setzero_si512 (),
8184 (__mmask16) __U, __R);
756c5857
AI
8185}
8186
8187extern __inline __m512i
8188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8189_mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
756c5857 8190{
79fb4764
HJ
8191 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
8192 (__v16si)
8193 _mm512_undefined_epi32 (),
8194 (__mmask16) -1, __R);
8195}
756c5857
AI
8196
8197extern __inline __m512i
8198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
8199_mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
8200 const int __R)
756c5857 8201{
79fb4764
HJ
8202 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
8203 (__v16si) __W,
8204 (__mmask16) __U, __R);
756c5857
AI
8205}
8206
8207extern __inline __m512i
8208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8209_mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
756c5857 8210{
79fb4764
HJ
8211 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
8212 (__v16si)
8213 _mm512_setzero_si512 (),
8214 (__mmask16) __U, __R);
756c5857 8215}
79fb4764
HJ
8216#else
8217#define _mm512_cvtt_roundps_epi32(A, B) \
8218 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
8219
8220#define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
8221 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
8222
8223#define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
8224 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
8225
8226#define _mm512_cvtt_roundps_epu32(A, B) \
8227 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
8228
8229#define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
8230 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
756c5857 8231
79fb4764
HJ
8232#define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
8233 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
8234#endif
8235
8236#ifdef __OPTIMIZE__
756c5857
AI
8237extern __inline __m512i
8238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8239_mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
756c5857 8240{
79fb4764
HJ
8241 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
8242 (__v16si)
8243 _mm512_undefined_epi32 (),
8244 (__mmask16) -1, __R);
756c5857
AI
8245}
8246
8247extern __inline __m512i
8248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
8249_mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
8250 const int __R)
756c5857 8251{
79fb4764
HJ
8252 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
8253 (__v16si) __W,
8254 (__mmask16) __U, __R);
756c5857
AI
8255}
8256
8257extern __inline __m512i
8258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8259_mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
756c5857 8260{
79fb4764
HJ
8261 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
8262 (__v16si)
8263 _mm512_setzero_si512 (),
8264 (__mmask16) __U, __R);
756c5857
AI
8265}
8266
8267extern __inline __m512i
8268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8269_mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
756c5857 8270{
79fb4764
HJ
8271 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
8272 (__v16si)
8273 _mm512_undefined_epi32 (),
8274 (__mmask16) -1, __R);
756c5857
AI
8275}
8276
8277extern __inline __m512i
8278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
8279_mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
8280 const int __R)
756c5857 8281{
79fb4764
HJ
8282 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
8283 (__v16si) __W,
8284 (__mmask16) __U, __R);
756c5857
AI
8285}
8286
8287extern __inline __m512i
8288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8289_mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
756c5857 8290{
79fb4764
HJ
8291 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
8292 (__v16si)
8293 _mm512_setzero_si512 (),
8294 (__mmask16) __U, __R);
756c5857 8295}
79fb4764
HJ
8296#else
8297#define _mm512_cvt_roundps_epi32(A, B) \
8298 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
756c5857 8299
79fb4764
HJ
8300#define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
8301 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
8302
8303#define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
8304 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
8305
8306#define _mm512_cvt_roundps_epu32(A, B) \
8307 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
8308
8309#define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
8310 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
8311
8312#define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
8313 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
8314#endif
8315
8316extern __inline __m128i
756c5857 8317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8318_mm512_cvtepi32_epi8 (__m512i __A)
756c5857 8319{
79fb4764
HJ
8320 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
8321 (__v16qi)
8322 _mm_undefined_si128 (),
756c5857
AI
8323 (__mmask16) -1);
8324}
8325
79fb4764 8326extern __inline void
756c5857 8327__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8328_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
756c5857 8329{
79fb4764 8330 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
756c5857
AI
8331}
8332
79fb4764 8333extern __inline __m128i
756c5857 8334__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8335_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
756c5857 8336{
79fb4764
HJ
8337 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
8338 (__v16qi) __O, __M);
756c5857
AI
8339}
8340
79fb4764 8341extern __inline __m128i
756c5857 8342__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8343_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
756c5857 8344{
79fb4764
HJ
8345 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
8346 (__v16qi)
8347 _mm_setzero_si128 (),
8348 __M);
756c5857
AI
8349}
8350
79fb4764 8351extern __inline __m128i
756c5857 8352__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8353_mm512_cvtsepi32_epi8 (__m512i __A)
756c5857 8354{
79fb4764
HJ
8355 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
8356 (__v16qi)
8357 _mm_undefined_si128 (),
8358 (__mmask16) -1);
756c5857
AI
8359}
8360
79fb4764 8361extern __inline void
756c5857 8362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8363_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
756c5857 8364{
79fb4764 8365 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
756c5857
AI
8366}
8367
79fb4764 8368extern __inline __m128i
756c5857 8369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8370_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
756c5857 8371{
79fb4764
HJ
8372 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
8373 (__v16qi) __O, __M);
756c5857
AI
8374}
8375
79fb4764 8376extern __inline __m128i
756c5857 8377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8378_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
756c5857 8379{
79fb4764
HJ
8380 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
8381 (__v16qi)
8382 _mm_setzero_si128 (),
8383 __M);
756c5857
AI
8384}
8385
79fb4764 8386extern __inline __m128i
756c5857 8387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8388_mm512_cvtusepi32_epi8 (__m512i __A)
756c5857 8389{
79fb4764
HJ
8390 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
8391 (__v16qi)
8392 _mm_undefined_si128 (),
8393 (__mmask16) -1);
756c5857
AI
8394}
8395
79fb4764 8396extern __inline void
756c5857 8397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8398_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
756c5857 8399{
79fb4764 8400 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
756c5857
AI
8401}
8402
79fb4764 8403extern __inline __m128i
260d3642 8404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8405_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
260d3642 8406{
79fb4764
HJ
8407 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
8408 (__v16qi) __O,
8409 __M);
260d3642
IT
8410}
8411
79fb4764 8412extern __inline __m128i
260d3642 8413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8414_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
260d3642 8415{
79fb4764
HJ
8416 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
8417 (__v16qi)
8418 _mm_setzero_si128 (),
8419 __M);
260d3642
IT
8420}
8421
79fb4764 8422extern __inline __m256i
260d3642 8423__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8424_mm512_cvtepi32_epi16 (__m512i __A)
260d3642 8425{
79fb4764
HJ
8426 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
8427 (__v16hi)
8428 _mm256_undefined_si256 (),
8429 (__mmask16) -1);
260d3642
IT
8430}
8431
79fb4764 8432extern __inline void
260d3642 8433__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8434_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
260d3642 8435{
79fb4764 8436 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
260d3642
IT
8437}
8438
79fb4764 8439extern __inline __m256i
dcb2c527 8440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8441_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
dcb2c527 8442{
79fb4764
HJ
8443 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
8444 (__v16hi) __O, __M);
dcb2c527
JJ
8445}
8446
79fb4764 8447extern __inline __m256i
dcb2c527 8448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8449_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
dcb2c527 8450{
79fb4764
HJ
8451 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
8452 (__v16hi)
8453 _mm256_setzero_si256 (),
8454 __M);
dcb2c527
JJ
8455}
8456
79fb4764 8457extern __inline __m256i
dcb2c527 8458__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8459_mm512_cvtsepi32_epi16 (__m512i __A)
dcb2c527 8460{
79fb4764
HJ
8461 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
8462 (__v16hi)
8463 _mm256_undefined_si256 (),
8464 (__mmask16) -1);
dcb2c527
JJ
8465}
8466
79fb4764 8467extern __inline void
dcb2c527 8468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8469_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
dcb2c527 8470{
79fb4764 8471 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
dcb2c527
JJ
8472}
8473
79fb4764 8474extern __inline __m256i
756c5857 8475__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8476_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
756c5857 8477{
79fb4764
HJ
8478 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
8479 (__v16hi) __O, __M);
756c5857
AI
8480}
8481
79fb4764 8482extern __inline __m256i
756c5857 8483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8484_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
756c5857 8485{
79fb4764
HJ
8486 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
8487 (__v16hi)
8488 _mm256_setzero_si256 (),
8489 __M);
756c5857
AI
8490}
8491
79fb4764 8492extern __inline __m256i
756c5857 8493__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8494_mm512_cvtusepi32_epi16 (__m512i __A)
756c5857 8495{
79fb4764
HJ
8496 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
8497 (__v16hi)
8498 _mm256_undefined_si256 (),
8499 (__mmask16) -1);
756c5857
AI
8500}
8501
79fb4764 8502extern __inline void
756c5857 8503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8504_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
756c5857 8505{
79fb4764 8506 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
756c5857
AI
8507}
8508
79fb4764 8509extern __inline __m256i
756c5857 8510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8511_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
756c5857 8512{
79fb4764
HJ
8513 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
8514 (__v16hi) __O,
8515 __M);
756c5857
AI
8516}
8517
79fb4764 8518extern __inline __m256i
756c5857 8519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8520_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
756c5857 8521{
79fb4764
HJ
8522 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
8523 (__v16hi)
8524 _mm256_setzero_si256 (),
8525 __M);
756c5857
AI
8526}
8527
79fb4764 8528extern __inline __m256i
756c5857 8529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8530_mm512_cvtepi64_epi32 (__m512i __A)
756c5857 8531{
79fb4764
HJ
8532 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
8533 (__v8si)
8534 _mm256_undefined_si256 (),
8535 (__mmask8) -1);
756c5857
AI
8536}
8537
79fb4764 8538extern __inline void
756c5857 8539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8540_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
756c5857 8541{
79fb4764 8542 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
756c5857
AI
8543}
8544
79fb4764 8545extern __inline __m256i
756c5857 8546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8547_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
756c5857 8548{
79fb4764
HJ
8549 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
8550 (__v8si) __O, __M);
756c5857
AI
8551}
8552
79fb4764 8553extern __inline __m256i
756c5857 8554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8555_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
756c5857 8556{
79fb4764
HJ
8557 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
8558 (__v8si)
8559 _mm256_setzero_si256 (),
8560 __M);
756c5857
AI
8561}
8562
79fb4764 8563extern __inline __m256i
756c5857 8564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8565_mm512_cvtsepi64_epi32 (__m512i __A)
756c5857 8566{
79fb4764
HJ
8567 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
8568 (__v8si)
8569 _mm256_undefined_si256 (),
8570 (__mmask8) -1);
756c5857
AI
8571}
8572
79fb4764 8573extern __inline void
756c5857 8574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8575_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
756c5857 8576{
79fb4764 8577 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
756c5857
AI
8578}
8579
79fb4764 8580extern __inline __m256i
756c5857 8581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8582_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
756c5857 8583{
79fb4764
HJ
8584 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
8585 (__v8si) __O, __M);
756c5857
AI
8586}
8587
79fb4764 8588extern __inline __m256i
756c5857 8589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8590_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
756c5857 8591{
79fb4764
HJ
8592 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
8593 (__v8si)
8594 _mm256_setzero_si256 (),
8595 __M);
756c5857
AI
8596}
8597
79fb4764 8598extern __inline __m256i
756c5857 8599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8600_mm512_cvtusepi64_epi32 (__m512i __A)
756c5857 8601{
79fb4764
HJ
8602 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
8603 (__v8si)
8604 _mm256_undefined_si256 (),
8605 (__mmask8) -1);
756c5857
AI
8606}
8607
79fb4764 8608extern __inline void
756c5857 8609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8610_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
756c5857 8611{
79fb4764 8612 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
756c5857
AI
8613}
8614
79fb4764 8615extern __inline __m256i
756c5857 8616__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8617_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
756c5857 8618{
79fb4764
HJ
8619 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
8620 (__v8si) __O, __M);
756c5857
AI
8621}
8622
79fb4764 8623extern __inline __m256i
756c5857 8624__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8625_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
756c5857 8626{
79fb4764
HJ
8627 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
8628 (__v8si)
8629 _mm256_setzero_si256 (),
8630 __M);
756c5857 8631}
756c5857 8632
79fb4764 8633extern __inline __m128i
756c5857 8634__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8635_mm512_cvtepi64_epi16 (__m512i __A)
756c5857 8636{
79fb4764
HJ
8637 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
8638 (__v8hi)
8639 _mm_undefined_si128 (),
8640 (__mmask8) -1);
756c5857
AI
8641}
8642
79fb4764 8643extern __inline void
756c5857 8644__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8645_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
756c5857 8646{
79fb4764 8647 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
756c5857
AI
8648}
8649
79fb4764 8650extern __inline __m128i
756c5857 8651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8652_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
756c5857 8653{
79fb4764
HJ
8654 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
8655 (__v8hi) __O, __M);
756c5857
AI
8656}
8657
79fb4764 8658extern __inline __m128i
756c5857 8659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8660_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
756c5857 8661{
79fb4764
HJ
8662 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
8663 (__v8hi)
8664 _mm_setzero_si128 (),
8665 __M);
756c5857
AI
8666}
8667
79fb4764 8668extern __inline __m128i
756c5857 8669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8670_mm512_cvtsepi64_epi16 (__m512i __A)
756c5857 8671{
79fb4764
HJ
8672 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
8673 (__v8hi)
8674 _mm_undefined_si128 (),
8675 (__mmask8) -1);
756c5857
AI
8676}
8677
79fb4764 8678extern __inline void
756c5857 8679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8680_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
756c5857 8681{
79fb4764 8682 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
756c5857 8683}
756c5857 8684
79fb4764 8685extern __inline __m128i
756c5857 8686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8687_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
756c5857 8688{
79fb4764
HJ
8689 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
8690 (__v8hi) __O, __M);
756c5857
AI
8691}
8692
79fb4764 8693extern __inline __m128i
756c5857 8694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8695_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
756c5857 8696{
79fb4764
HJ
8697 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
8698 (__v8hi)
8699 _mm_setzero_si128 (),
8700 __M);
756c5857
AI
8701}
8702
79fb4764 8703extern __inline __m128i
756c5857 8704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8705_mm512_cvtusepi64_epi16 (__m512i __A)
756c5857 8706{
79fb4764
HJ
8707 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
8708 (__v8hi)
8709 _mm_undefined_si128 (),
8710 (__mmask8) -1);
756c5857
AI
8711}
8712
79fb4764 8713extern __inline void
756c5857 8714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8715_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
756c5857 8716{
79fb4764 8717 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
756c5857
AI
8718}
8719
79fb4764 8720extern __inline __m128i
756c5857 8721__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8722_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
756c5857 8723{
79fb4764
HJ
8724 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
8725 (__v8hi) __O, __M);
756c5857
AI
8726}
8727
79fb4764 8728extern __inline __m128i
756c5857 8729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8730_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
756c5857 8731{
79fb4764
HJ
8732 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
8733 (__v8hi)
8734 _mm_setzero_si128 (),
8735 __M);
756c5857 8736}
756c5857 8737
79fb4764 8738extern __inline __m128i
756c5857 8739__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8740_mm512_cvtepi64_epi8 (__m512i __A)
756c5857 8741{
79fb4764
HJ
8742 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
8743 (__v16qi)
8744 _mm_undefined_si128 (),
8745 (__mmask8) -1);
756c5857
AI
8746}
8747
79fb4764 8748extern __inline void
756c5857 8749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8750_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
756c5857 8751{
79fb4764
HJ
8752 __builtin_ia32_pmovqb512mem_mask ((unsigned long long *) __P,
8753 (__v8di) __A, __M);
756c5857
AI
8754}
8755
79fb4764 8756extern __inline __m128i
756c5857 8757__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8758_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
756c5857 8759{
79fb4764
HJ
8760 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
8761 (__v16qi) __O, __M);
756c5857
AI
8762}
8763
79fb4764 8764extern __inline __m128i
756c5857 8765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8766_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
756c5857 8767{
79fb4764
HJ
8768 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
8769 (__v16qi)
8770 _mm_setzero_si128 (),
8771 __M);
756c5857
AI
8772}
8773
79fb4764 8774extern __inline __m128i
756c5857 8775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8776_mm512_cvtsepi64_epi8 (__m512i __A)
756c5857 8777{
79fb4764
HJ
8778 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
8779 (__v16qi)
8780 _mm_undefined_si128 (),
8781 (__mmask8) -1);
756c5857
AI
8782}
8783
79fb4764 8784extern __inline void
756c5857 8785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8786_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
756c5857 8787{
79fb4764 8788 __builtin_ia32_pmovsqb512mem_mask ((unsigned long long *) __P, (__v8di) __A, __M);
756c5857 8789}
756c5857 8790
79fb4764 8791extern __inline __m128i
756c5857 8792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8793_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
756c5857 8794{
79fb4764
HJ
8795 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
8796 (__v16qi) __O, __M);
756c5857
AI
8797}
8798
79fb4764 8799extern __inline __m128i
756c5857 8800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8801_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
756c5857 8802{
79fb4764
HJ
8803 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
8804 (__v16qi)
8805 _mm_setzero_si128 (),
8806 __M);
756c5857
AI
8807}
8808
79fb4764 8809extern __inline __m128i
756c5857 8810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8811_mm512_cvtusepi64_epi8 (__m512i __A)
756c5857 8812{
79fb4764
HJ
8813 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
8814 (__v16qi)
8815 _mm_undefined_si128 (),
8816 (__mmask8) -1);
756c5857
AI
8817}
8818
79fb4764 8819extern __inline void
756c5857 8820__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8821_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
756c5857 8822{
79fb4764 8823 __builtin_ia32_pmovusqb512mem_mask ((unsigned long long *) __P, (__v8di) __A, __M);
756c5857
AI
8824}
8825
79fb4764 8826extern __inline __m128i
756c5857 8827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8828_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
756c5857 8829{
79fb4764
HJ
8830 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
8831 (__v16qi) __O,
8832 __M);
756c5857
AI
8833}
8834
79fb4764 8835extern __inline __m128i
756c5857 8836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8837_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
756c5857 8838{
79fb4764
HJ
8839 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
8840 (__v16qi)
8841 _mm_setzero_si128 (),
8842 __M);
756c5857
AI
8843}
8844
8845extern __inline __m512d
8846__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8847_mm512_cvtepi32_pd (__m256i __A)
756c5857 8848{
79fb4764 8849 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
756c5857 8850 (__v8df)
0b192937 8851 _mm512_undefined_pd (),
756c5857
AI
8852 (__mmask8) -1);
8853}
8854
8855extern __inline __m512d
8856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8857_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
756c5857 8858{
79fb4764 8859 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
756c5857
AI
8860 (__v8df) __W,
8861 (__mmask8) __U);
8862}
8863
8864extern __inline __m512d
8865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8866_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
756c5857 8867{
79fb4764 8868 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
756c5857
AI
8869 (__v8df)
8870 _mm512_setzero_pd (),
8871 (__mmask8) __U);
8872}
8873
79fb4764 8874extern __inline __m512d
756c5857 8875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8876_mm512_cvtepu32_pd (__m256i __A)
756c5857 8877{
79fb4764
HJ
8878 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
8879 (__v8df)
8880 _mm512_undefined_pd (),
8881 (__mmask8) -1);
756c5857
AI
8882}
8883
79fb4764 8884extern __inline __m512d
756c5857 8885__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8886_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
756c5857 8887{
79fb4764
HJ
8888 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
8889 (__v8df) __W,
8890 (__mmask8) __U);
756c5857
AI
8891}
8892
79fb4764 8893extern __inline __m512d
756c5857 8894__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8895_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
756c5857 8896{
79fb4764
HJ
8897 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
8898 (__v8df)
8899 _mm512_setzero_pd (),
8900 (__mmask8) __U);
756c5857
AI
8901}
8902
8903#ifdef __OPTIMIZE__
79fb4764 8904extern __inline __m512
756c5857 8905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8906_mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
756c5857 8907{
79fb4764
HJ
8908 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
8909 (__v16sf)
8910 _mm512_undefined_ps (),
8911 (__mmask16) -1, __R);
756c5857
AI
8912}
8913
79fb4764 8914extern __inline __m512
756c5857 8915__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
8916_mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
8917 const int __R)
756c5857 8918{
79fb4764
HJ
8919 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
8920 (__v16sf) __W,
8921 (__mmask16) __U, __R);
756c5857
AI
8922}
8923
79fb4764 8924extern __inline __m512
756c5857 8925__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8926_mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
756c5857 8927{
79fb4764
HJ
8928 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
8929 (__v16sf)
8930 _mm512_setzero_ps (),
8931 (__mmask16) __U, __R);
756c5857
AI
8932}
8933
8934extern __inline __m512
8935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8936_mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
756c5857 8937{
79fb4764 8938 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
756c5857 8939 (__v16sf)
0b192937 8940 _mm512_undefined_ps (),
756c5857
AI
8941 (__mmask16) -1, __R);
8942}
8943
8944extern __inline __m512
8945__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
8946_mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
8947 const int __R)
756c5857 8948{
79fb4764 8949 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
756c5857
AI
8950 (__v16sf) __W,
8951 (__mmask16) __U, __R);
8952}
8953
8954extern __inline __m512
8955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8956_mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
756c5857 8957{
79fb4764 8958 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
756c5857
AI
8959 (__v16sf)
8960 _mm512_setzero_ps (),
8961 (__mmask16) __U, __R);
8962}
8963
79fb4764
HJ
8964#else
8965#define _mm512_cvt_roundepi32_ps(A, B) \
8966 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
8967
8968#define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
8969 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
8970
8971#define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
8972 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
8973
8974#define _mm512_cvt_roundepu32_ps(A, B) \
8975 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
8976
8977#define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
8978 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
8979
8980#define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
8981 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
8982#endif
8983
8984#ifdef __OPTIMIZE__
8985extern __inline __m256d
756c5857 8986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 8987_mm512_extractf64x4_pd (__m512d __A, const int __imm)
756c5857 8988{
79fb4764
HJ
8989 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
8990 __imm,
8991 (__v4df)
8992 _mm256_undefined_pd (),
8993 (__mmask8) -1);
756c5857
AI
8994}
8995
79fb4764 8996extern __inline __m256d
756c5857 8997__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
8998_mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
8999 const int __imm)
756c5857 9000{
79fb4764
HJ
9001 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
9002 __imm,
9003 (__v4df) __W,
9004 (__mmask8) __U);
756c5857
AI
9005}
9006
79fb4764 9007extern __inline __m256d
756c5857 9008__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9009_mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
756c5857 9010{
79fb4764
HJ
9011 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
9012 __imm,
9013 (__v4df)
9014 _mm256_setzero_pd (),
9015 (__mmask8) __U);
9016}
9017
9018extern __inline __m128
9019__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9020_mm512_extractf32x4_ps (__m512 __A, const int __imm)
9021{
9022 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
9023 __imm,
9024 (__v4sf)
9025 _mm_undefined_ps (),
9026 (__mmask8) -1);
9027}
9028
9029extern __inline __m128
9030__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9031_mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
9032 const int __imm)
9033{
9034 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
9035 __imm,
9036 (__v4sf) __W,
9037 (__mmask8) __U);
9038}
9039
9040extern __inline __m128
9041__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9042_mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
9043{
9044 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
9045 __imm,
9046 (__v4sf)
9047 _mm_setzero_ps (),
9048 (__mmask8) __U);
756c5857
AI
9049}
9050
9051extern __inline __m256i
9052__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9053_mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
756c5857 9054{
79fb4764
HJ
9055 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
9056 __imm,
9057 (__v4di)
9058 _mm256_undefined_si256 (),
9059 (__mmask8) -1);
756c5857
AI
9060}
9061
9062extern __inline __m256i
9063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
9064_mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
9065 const int __imm)
756c5857 9066{
79fb4764
HJ
9067 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
9068 __imm,
9069 (__v4di) __W,
9070 (__mmask8) __U);
756c5857
AI
9071}
9072
9073extern __inline __m256i
9074__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9075_mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
756c5857 9076{
79fb4764
HJ
9077 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
9078 __imm,
9079 (__v4di)
756c5857 9080 _mm256_setzero_si256 (),
79fb4764
HJ
9081 (__mmask8) __U);
9082}
9083
9084extern __inline __m128i
9085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9086_mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
9087{
9088 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
9089 __imm,
9090 (__v4si)
9091 _mm_undefined_si128 (),
9092 (__mmask8) -1);
9093}
9094
9095extern __inline __m128i
9096__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9097_mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
9098 const int __imm)
9099{
9100 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
9101 __imm,
9102 (__v4si) __W,
9103 (__mmask8) __U);
9104}
9105
9106extern __inline __m128i
9107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9108_mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
9109{
9110 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
9111 __imm,
9112 (__v4si)
9113 _mm_setzero_si128 (),
9114 (__mmask8) __U);
756c5857
AI
9115}
9116#else
756c5857 9117
79fb4764
HJ
9118#define _mm512_extractf64x4_pd(X, C) \
9119 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
9120 (int) (C),\
9121 (__v4df)(__m256d)_mm256_undefined_pd(),\
9122 (__mmask8)-1))
756c5857 9123
79fb4764
HJ
9124#define _mm512_mask_extractf64x4_pd(W, U, X, C) \
9125 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
9126 (int) (C),\
9127 (__v4df)(__m256d)(W),\
9128 (__mmask8)(U)))
756c5857 9129
79fb4764
HJ
9130#define _mm512_maskz_extractf64x4_pd(U, X, C) \
9131 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
9132 (int) (C),\
9133 (__v4df)(__m256d)_mm256_setzero_pd(),\
9134 (__mmask8)(U)))
756c5857 9135
79fb4764
HJ
9136#define _mm512_extractf32x4_ps(X, C) \
9137 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
9138 (int) (C),\
9139 (__v4sf)(__m128)_mm_undefined_ps(),\
9140 (__mmask8)-1))
756c5857 9141
79fb4764
HJ
9142#define _mm512_mask_extractf32x4_ps(W, U, X, C) \
9143 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
9144 (int) (C),\
9145 (__v4sf)(__m128)(W),\
9146 (__mmask8)(U)))
756c5857 9147
79fb4764
HJ
9148#define _mm512_maskz_extractf32x4_ps(U, X, C) \
9149 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
9150 (int) (C),\
9151 (__v4sf)(__m128)_mm_setzero_ps(),\
9152 (__mmask8)(U)))
9153
9154#define _mm512_extracti64x4_epi64(X, C) \
9155 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
9156 (int) (C),\
9157 (__v4di)(__m256i)_mm256_undefined_si256 (),\
9158 (__mmask8)-1))
9159
9160#define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
9161 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
9162 (int) (C),\
9163 (__v4di)(__m256i)(W),\
9164 (__mmask8)(U)))
9165
9166#define _mm512_maskz_extracti64x4_epi64(U, X, C) \
9167 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
9168 (int) (C),\
9169 (__v4di)(__m256i)_mm256_setzero_si256 (),\
9170 (__mmask8)(U)))
9171
9172#define _mm512_extracti32x4_epi32(X, C) \
9173 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
9174 (int) (C),\
9175 (__v4si)(__m128i)_mm_undefined_si128 (),\
9176 (__mmask8)-1))
9177
9178#define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
9179 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
9180 (int) (C),\
9181 (__v4si)(__m128i)(W),\
9182 (__mmask8)(U)))
9183
9184#define _mm512_maskz_extracti32x4_epi32(U, X, C) \
9185 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
9186 (int) (C),\
9187 (__v4si)(__m128i)_mm_setzero_si128 (),\
9188 (__mmask8)(U)))
756c5857
AI
9189#endif
9190
9191#ifdef __OPTIMIZE__
79fb4764 9192extern __inline __m512i
756c5857 9193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9194_mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
756c5857 9195{
79fb4764
HJ
9196 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
9197 (__v4si) __B,
9198 __imm,
9199 (__v16si) __A, -1);
756c5857
AI
9200}
9201
79fb4764 9202extern __inline __m512
756c5857 9203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9204_mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
756c5857 9205{
79fb4764
HJ
9206 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
9207 (__v4sf) __B,
9208 __imm,
9209 (__v16sf) __A, -1);
756c5857
AI
9210}
9211
79fb4764 9212extern __inline __m512i
075691af 9213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9214_mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
075691af 9215{
79fb4764
HJ
9216 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
9217 (__v4di) __B,
9218 __imm,
9219 (__v8di)
9220 _mm512_undefined_epi32 (),
9221 (__mmask8) -1);
075691af
AI
9222}
9223
79fb4764 9224extern __inline __m512i
93103603 9225__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
9226_mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
9227 __m256i __B, const int __imm)
93103603 9228{
79fb4764
HJ
9229 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
9230 (__v4di) __B,
9231 __imm,
9232 (__v8di) __W,
9233 (__mmask8) __U);
93103603
SP
9234}
9235
79fb4764 9236extern __inline __m512i
93103603 9237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
9238_mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
9239 const int __imm)
93103603 9240{
79fb4764
HJ
9241 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
9242 (__v4di) __B,
9243 __imm,
9244 (__v8di)
9245 _mm512_setzero_si512 (),
9246 (__mmask8) __U);
93103603
SP
9247}
9248
79fb4764 9249extern __inline __m512d
075691af 9250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9251_mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
075691af 9252{
79fb4764
HJ
9253 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
9254 (__v4df) __B,
9255 __imm,
9256 (__v8df)
9257 _mm512_undefined_pd (),
9258 (__mmask8) -1);
075691af 9259}
93103603 9260
79fb4764 9261extern __inline __m512d
93103603 9262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
9263_mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
9264 __m256d __B, const int __imm)
93103603 9265{
79fb4764
HJ
9266 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
9267 (__v4df) __B,
9268 __imm,
9269 (__v8df) __W,
9270 (__mmask8) __U);
93103603
SP
9271}
9272
79fb4764 9273extern __inline __m512d
93103603 9274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
9275_mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
9276 const int __imm)
93103603 9277{
79fb4764
HJ
9278 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
9279 (__v4df) __B,
9280 __imm,
9281 (__v8df)
9282 _mm512_setzero_pd (),
9283 (__mmask8) __U);
93103603 9284}
756c5857 9285#else
79fb4764
HJ
9286#define _mm512_insertf32x4(X, Y, C) \
9287 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
9288 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
075691af 9289
79fb4764
HJ
9290#define _mm512_inserti32x4(X, Y, C) \
9291 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
9292 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
075691af 9293
79fb4764
HJ
9294#define _mm512_insertf64x4(X, Y, C) \
9295 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
9296 (__v4df)(__m256d) (Y), (int) (C), \
9297 (__v8df)(__m512d)_mm512_undefined_pd(), \
9298 (__mmask8)-1))
93103603 9299
79fb4764
HJ
9300#define _mm512_mask_insertf64x4(W, U, X, Y, C) \
9301 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
9302 (__v4df)(__m256d) (Y), (int) (C), \
9303 (__v8df)(__m512d)(W), \
9304 (__mmask8)(U)))
93103603 9305
79fb4764
HJ
9306#define _mm512_maskz_insertf64x4(U, X, Y, C) \
9307 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
9308 (__v4df)(__m256d) (Y), (int) (C), \
9309 (__v8df)(__m512d)_mm512_setzero_pd(), \
9310 (__mmask8)(U)))
93103603 9311
79fb4764
HJ
9312#define _mm512_inserti64x4(X, Y, C) \
9313 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
9314 (__v4di)(__m256i) (Y), (int) (C), \
9315 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
9316 (__mmask8)-1))
93103603 9317
79fb4764
HJ
9318#define _mm512_mask_inserti64x4(W, U, X, Y, C) \
9319 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
9320 (__v4di)(__m256i) (Y), (int) (C),\
9321 (__v8di)(__m512i)(W),\
9322 (__mmask8)(U)))
93103603 9323
79fb4764
HJ
9324#define _mm512_maskz_inserti64x4(U, X, Y, C) \
9325 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
9326 (__v4di)(__m256i) (Y), (int) (C), \
9327 (__v8di)(__m512i)_mm512_setzero_si512 (), \
9328 (__mmask8)(U)))
756c5857
AI
9329#endif
9330
79fb4764
HJ
9331extern __inline __m512d
9332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9333_mm512_loadu_pd (void const *__P)
9334{
9335 return *(__m512d_u *)__P;
9336}
93103603 9337
79fb4764
HJ
9338extern __inline __m512d
9339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9340_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
9341{
9342 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
9343 (__v8df) __W,
9344 (__mmask8) __U);
9345}
93103603 9346
79fb4764
HJ
9347extern __inline __m512d
9348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9349_mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
9350{
9351 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
9352 (__v8df)
9353 _mm512_setzero_pd (),
9354 (__mmask8) __U);
9355}
93103603 9356
756c5857
AI
9357extern __inline void
9358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9359_mm512_storeu_pd (void *__P, __m512d __A)
756c5857 9360{
79fb4764 9361 *(__m512d_u *)__P = __A;
756c5857
AI
9362}
9363
9364extern __inline void
9365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9366_mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
756c5857 9367{
79fb4764
HJ
9368 __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
9369 (__mmask8) __U);
756c5857
AI
9370}
9371
79fb4764 9372extern __inline __m512
756c5857 9373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9374_mm512_loadu_ps (void const *__P)
756c5857 9375{
79fb4764 9376 return *(__m512_u *)__P;
756c5857
AI
9377}
9378
79fb4764 9379extern __inline __m512
c56a42b9 9380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9381_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
c56a42b9 9382{
79fb4764
HJ
9383 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
9384 (__v16sf) __W,
9385 (__mmask16) __U);
c56a42b9
KY
9386}
9387
79fb4764
HJ
9388extern __inline __m512
9389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9390_mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
9c3c2608 9391{
79fb4764
HJ
9392 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
9393 (__v16sf)
9394 _mm512_setzero_ps (),
9395 (__mmask16) __U);
9396}
9c3c2608 9397
79fb4764
HJ
9398extern __inline void
9399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9400_mm512_storeu_ps (void *__P, __m512 __A)
9c3c2608 9401{
79fb4764
HJ
9402 *(__m512_u *)__P = __A;
9403}
9c3c2608 9404
79fb4764 9405extern __inline void
075691af 9406__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9407_mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
075691af 9408{
79fb4764
HJ
9409 __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
9410 (__mmask16) __U);
075691af
AI
9411}
9412
79fb4764 9413extern __inline __m512i
68d872d7 9414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9415_mm512_loadu_epi64 (void const *__P)
68d872d7 9416{
79fb4764 9417 return *(__m512i_u *) __P;
68d872d7
SP
9418}
9419
79fb4764 9420extern __inline __m512i
68d872d7 9421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9422_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
68d872d7 9423{
79fb4764
HJ
9424 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
9425 (__v8di) __W,
9426 (__mmask8) __U);
68d872d7
SP
9427}
9428
79fb4764 9429extern __inline __m512i
075691af 9430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9431_mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
075691af 9432{
79fb4764
HJ
9433 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
9434 (__v8di)
9435 _mm512_setzero_si512 (),
9436 (__mmask8) __U);
075691af
AI
9437}
9438
79fb4764 9439extern __inline void
68d872d7 9440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9441_mm512_storeu_epi64 (void *__P, __m512i __A)
68d872d7 9442{
79fb4764 9443 *(__m512i_u *) __P = (__m512i_u) __A;
68d872d7
SP
9444}
9445
79fb4764 9446extern __inline void
68d872d7 9447__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9448_mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
68d872d7 9449{
79fb4764
HJ
9450 __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
9451 (__mmask8) __U);
68d872d7
SP
9452}
9453
79fb4764 9454extern __inline __m512i
756c5857 9455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9456_mm512_loadu_si512 (void const *__P)
756c5857 9457{
79fb4764 9458 return *(__m512i_u *)__P;
756c5857
AI
9459}
9460
79fb4764 9461extern __inline __m512i
756c5857 9462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9463_mm512_loadu_epi32 (void const *__P)
756c5857 9464{
79fb4764 9465 return *(__m512i_u *) __P;
756c5857
AI
9466}
9467
79fb4764 9468extern __inline __m512i
756c5857 9469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9470_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
756c5857 9471{
79fb4764
HJ
9472 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
9473 (__v16si) __W,
9474 (__mmask16) __U);
756c5857
AI
9475}
9476
79fb4764 9477extern __inline __m512i
756c5857 9478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9479_mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
756c5857 9480{
79fb4764
HJ
9481 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
9482 (__v16si)
9483 _mm512_setzero_si512 (),
9484 (__mmask16) __U);
756c5857
AI
9485}
9486
79fb4764 9487extern __inline void
756c5857 9488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9489_mm512_storeu_si512 (void *__P, __m512i __A)
756c5857 9490{
79fb4764 9491 *(__m512i_u *)__P = __A;
756c5857
AI
9492}
9493
79fb4764 9494extern __inline void
756c5857 9495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9496_mm512_storeu_epi32 (void *__P, __m512i __A)
756c5857 9497{
79fb4764 9498 *(__m512i_u *) __P = (__m512i_u) __A;
756c5857
AI
9499}
9500
79fb4764 9501extern __inline void
756c5857 9502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9503_mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
756c5857 9504{
79fb4764
HJ
9505 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
9506 (__mmask16) __U);
756c5857
AI
9507}
9508
9509extern __inline __m512d
9510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9511_mm512_permutevar_pd (__m512d __A, __m512i __C)
756c5857 9512{
79fb4764
HJ
9513 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
9514 (__v8di) __C,
9515 (__v8df)
9516 _mm512_undefined_pd (),
9517 (__mmask8) -1);
756c5857
AI
9518}
9519
9520extern __inline __m512d
9521__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9522_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
756c5857 9523{
79fb4764
HJ
9524 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
9525 (__v8di) __C,
9526 (__v8df) __W,
9527 (__mmask8) __U);
756c5857
AI
9528}
9529
79fb4764 9530extern __inline __m512d
756c5857 9531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9532_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
756c5857 9533{
79fb4764
HJ
9534 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
9535 (__v8di) __C,
9536 (__v8df)
9537 _mm512_setzero_pd (),
9538 (__mmask8) __U);
756c5857
AI
9539}
9540
9541extern __inline __m512
9542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9543_mm512_permutevar_ps (__m512 __A, __m512i __C)
756c5857 9544{
79fb4764
HJ
9545 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
9546 (__v16si) __C,
9547 (__v16sf)
9548 _mm512_undefined_ps (),
9549 (__mmask16) -1);
756c5857
AI
9550}
9551
9552extern __inline __m512
9553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9554_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
756c5857 9555{
79fb4764
HJ
9556 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
9557 (__v16si) __C,
9558 (__v16sf) __W,
9559 (__mmask16) __U);
756c5857
AI
9560}
9561
79fb4764 9562extern __inline __m512
075691af 9563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9564_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
075691af 9565{
79fb4764
HJ
9566 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
9567 (__v16si) __C,
9568 (__v16sf)
9569 _mm512_setzero_ps (),
9570 (__mmask16) __U);
075691af
AI
9571}
9572
79fb4764 9573extern __inline __m512i
68d872d7 9574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9575_mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
68d872d7 9576{
79fb4764
HJ
9577 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
9578 /* idx */ ,
9579 (__v8di) __A,
9580 (__v8di) __B,
9581 (__mmask8) -1);
68d872d7
SP
9582}
9583
79fb4764 9584extern __inline __m512i
68d872d7 9585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
9586_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
9587 __m512i __B)
68d872d7 9588{
79fb4764
HJ
9589 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
9590 /* idx */ ,
9591 (__v8di) __A,
9592 (__v8di) __B,
9593 (__mmask8) __U);
68d872d7
SP
9594}
9595
79fb4764 9596extern __inline __m512i
075691af 9597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
9598_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
9599 __mmask8 __U, __m512i __B)
075691af 9600{
79fb4764
HJ
9601 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
9602 (__v8di) __I
9603 /* idx */ ,
9604 (__v8di) __B,
9605 (__mmask8) __U);
075691af
AI
9606}
9607
79fb4764 9608extern __inline __m512i
68d872d7 9609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
9610_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
9611 __m512i __I, __m512i __B)
68d872d7 9612{
79fb4764
HJ
9613 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
9614 /* idx */ ,
9615 (__v8di) __A,
9616 (__v8di) __B,
9617 (__mmask8) __U);
68d872d7
SP
9618}
9619
79fb4764 9620extern __inline __m512i
68d872d7 9621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9622_mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
68d872d7 9623{
79fb4764
HJ
9624 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
9625 /* idx */ ,
9626 (__v16si) __A,
9627 (__v16si) __B,
9628 (__mmask16) -1);
9629}
68d872d7 9630
79fb4764
HJ
9631extern __inline __m512i
9632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9633_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
9634 __m512i __I, __m512i __B)
9635{
9636 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
9637 /* idx */ ,
9638 (__v16si) __A,
9639 (__v16si) __B,
9640 (__mmask16) __U);
9641}
68d872d7 9642
79fb4764
HJ
9643extern __inline __m512i
9644__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9645_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
9646 __mmask16 __U, __m512i __B)
9647{
9648 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
9649 (__v16si) __I
9650 /* idx */ ,
9651 (__v16si) __B,
9652 (__mmask16) __U);
9653}
756c5857 9654
79fb4764
HJ
9655extern __inline __m512i
9656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9657_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
9658 __m512i __I, __m512i __B)
9659{
9660 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
9661 /* idx */ ,
9662 (__v16si) __A,
9663 (__v16si) __B,
9664 (__mmask16) __U);
9665}
756c5857 9666
79fb4764
HJ
9667extern __inline __m512d
9668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9669_mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
9670{
9671 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
9672 /* idx */ ,
9673 (__v8df) __A,
9674 (__v8df) __B,
9675 (__mmask8) -1);
9676}
756c5857 9677
79fb4764
HJ
9678extern __inline __m512d
9679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9680_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
9681 __m512d __B)
9682{
9683 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
9684 /* idx */ ,
9685 (__v8df) __A,
9686 (__v8df) __B,
9687 (__mmask8) __U);
9688}
756c5857 9689
79fb4764
HJ
9690extern __inline __m512d
9691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9692_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
9693 __m512d __B)
9694{
9695 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
9696 (__v8di) __I
9697 /* idx */ ,
9698 (__v8df) __B,
9699 (__mmask8) __U);
9700}
756c5857 9701
79fb4764
HJ
9702extern __inline __m512d
9703__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9704_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
9705 __m512d __B)
9706{
9707 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
9708 /* idx */ ,
9709 (__v8df) __A,
9710 (__v8df) __B,
9711 (__mmask8) __U);
9712}
756c5857 9713
756c5857
AI
9714extern __inline __m512
9715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9716_mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
756c5857 9717{
79fb4764
HJ
9718 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
9719 /* idx */ ,
9720 (__v16sf) __A,
9721 (__v16sf) __B,
9722 (__mmask16) -1);
756c5857
AI
9723}
9724
9725extern __inline __m512
9726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9727_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
756c5857 9728{
79fb4764
HJ
9729 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
9730 /* idx */ ,
9731 (__v16sf) __A,
9732 (__v16sf) __B,
9733 (__mmask16) __U);
756c5857
AI
9734}
9735
9736extern __inline __m512
9737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
9738_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
9739 __m512 __B)
756c5857 9740{
79fb4764
HJ
9741 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
9742 (__v16si) __I
9743 /* idx */ ,
9744 (__v16sf) __B,
9745 (__mmask16) __U);
756c5857
AI
9746}
9747
79fb4764 9748extern __inline __m512
756c5857 9749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
9750_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
9751 __m512 __B)
756c5857 9752{
79fb4764
HJ
9753 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
9754 /* idx */ ,
9755 (__v16sf) __A,
9756 (__v16sf) __B,
9757 (__mmask16) __U);
756c5857
AI
9758}
9759
79fb4764 9760#ifdef __OPTIMIZE__
756c5857
AI
9761extern __inline __m512d
9762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9763_mm512_permute_pd (__m512d __X, const int __C)
756c5857 9764{
79fb4764
HJ
9765 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
9766 (__v8df)
9767 _mm512_undefined_pd (),
9768 (__mmask8) -1);
756c5857
AI
9769}
9770
9771extern __inline __m512d
9772__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9773_mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
756c5857 9774{
79fb4764
HJ
9775 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
9776 (__v8df) __W,
9777 (__mmask8) __U);
756c5857 9778}
075691af 9779
79fb4764 9780extern __inline __m512d
075691af 9781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9782_mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
a7c4d6d1 9783{
79fb4764
HJ
9784 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
9785 (__v8df)
9786 _mm512_setzero_pd (),
9787 (__mmask8) __U);
a7c4d6d1
HL
9788}
9789
79fb4764 9790extern __inline __m512
a7c4d6d1 9791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9792_mm512_permute_ps (__m512 __X, const int __C)
075691af 9793{
79fb4764
HJ
9794 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
9795 (__v16sf)
9796 _mm512_undefined_ps (),
9797 (__mmask16) -1);
a7c4d6d1
HL
9798}
9799
79fb4764 9800extern __inline __m512
a7c4d6d1 9801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9802_mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
a7c4d6d1 9803{
79fb4764
HJ
9804 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
9805 (__v16sf) __W,
9806 (__mmask16) __U);
075691af
AI
9807}
9808
79fb4764 9809extern __inline __m512
075691af 9810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9811_mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
075691af 9812{
79fb4764
HJ
9813 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
9814 (__v16sf)
9815 _mm512_setzero_ps (),
9816 (__mmask16) __U);
a7c4d6d1 9817}
79fb4764
HJ
9818#else
9819#define _mm512_permute_pd(X, C) \
9820 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
9821 (__v8df)(__m512d)_mm512_undefined_pd(),\
9822 (__mmask8)(-1)))
9823
9824#define _mm512_mask_permute_pd(W, U, X, C) \
9825 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
9826 (__v8df)(__m512d)(W), \
9827 (__mmask8)(U)))
9828
9829#define _mm512_maskz_permute_pd(U, X, C) \
9830 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
9831 (__v8df)(__m512d)_mm512_setzero_pd(), \
9832 (__mmask8)(U)))
9833
9834#define _mm512_permute_ps(X, C) \
9835 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
9836 (__v16sf)(__m512)_mm512_undefined_ps(),\
9837 (__mmask16)(-1)))
a7c4d6d1 9838
79fb4764
HJ
9839#define _mm512_mask_permute_ps(W, U, X, C) \
9840 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
9841 (__v16sf)(__m512)(W), \
9842 (__mmask16)(U)))
9843
9844#define _mm512_maskz_permute_ps(U, X, C) \
9845 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
9846 (__v16sf)(__m512)_mm512_setzero_ps(), \
9847 (__mmask16)(U)))
9848#endif
9849
9850#ifdef __OPTIMIZE__
9851extern __inline __m512i
a7c4d6d1 9852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9853_mm512_permutex_epi64 (__m512i __X, const int __I)
a7c4d6d1 9854{
79fb4764
HJ
9855 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
9856 (__v8di)
9857 _mm512_undefined_epi32 (),
9858 (__mmask8) (-1));
a7c4d6d1
HL
9859}
9860
79fb4764 9861extern __inline __m512i
a7c4d6d1 9862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
9863_mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
9864 __m512i __X, const int __I)
a7c4d6d1 9865{
79fb4764
HJ
9866 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
9867 (__v8di) __W,
9868 (__mmask8) __M);
075691af
AI
9869}
9870
79fb4764 9871extern __inline __m512i
756c5857 9872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9873_mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
756c5857 9874{
79fb4764
HJ
9875 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
9876 (__v8di)
9877 _mm512_setzero_si512 (),
9878 (__mmask8) __M);
756c5857
AI
9879}
9880
9881extern __inline __m512d
9882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9883_mm512_permutex_pd (__m512d __X, const int __M)
756c5857 9884{
79fb4764
HJ
9885 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
9886 (__v8df)
9887 _mm512_undefined_pd (),
9888 (__mmask8) -1);
756c5857
AI
9889}
9890
79fb4764 9891extern __inline __m512d
756c5857 9892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9893_mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
756c5857 9894{
79fb4764
HJ
9895 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
9896 (__v8df) __W,
9897 (__mmask8) __U);
756c5857
AI
9898}
9899
9900extern __inline __m512d
9901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9902_mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
756c5857 9903{
79fb4764
HJ
9904 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
9905 (__v8df)
9906 _mm512_setzero_pd (),
9907 (__mmask8) __U);
756c5857 9908}
79fb4764
HJ
9909#else
9910#define _mm512_permutex_pd(X, M) \
9911 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
9912 (__v8df)(__m512d)_mm512_undefined_pd(),\
9913 (__mmask8)-1))
756c5857 9914
79fb4764
HJ
9915#define _mm512_mask_permutex_pd(W, U, X, M) \
9916 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
9917 (__v8df)(__m512d)(W), (__mmask8)(U)))
756c5857 9918
79fb4764
HJ
9919#define _mm512_maskz_permutex_pd(U, X, M) \
9920 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
9921 (__v8df)(__m512d)_mm512_setzero_pd(),\
9922 (__mmask8)(U)))
9923
9924#define _mm512_permutex_epi64(X, I) \
9925 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
9926 (int)(I), \
9927 (__v8di)(__m512i) \
9928 (_mm512_undefined_epi32 ()),\
9929 (__mmask8)(-1)))
9930
9931#define _mm512_maskz_permutex_epi64(M, X, I) \
9932 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
9933 (int)(I), \
9934 (__v8di)(__m512i) \
9935 (_mm512_setzero_si512 ()),\
9936 (__mmask8)(M)))
9937
9938#define _mm512_mask_permutex_epi64(W, M, X, I) \
9939 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
9940 (int)(I), \
9941 (__v8di)(__m512i)(W), \
9942 (__mmask8)(M)))
9943#endif
9944
9945extern __inline __m512i
756c5857 9946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9947_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
756c5857 9948{
79fb4764
HJ
9949 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
9950 (__v8di) __X,
9951 (__v8di)
9952 _mm512_setzero_si512 (),
9953 __M);
756c5857
AI
9954}
9955
79fb4764 9956extern __inline __m512i
756c5857 9957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9958_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
756c5857 9959{
79fb4764
HJ
9960 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
9961 (__v8di) __X,
9962 (__v8di)
9963 _mm512_undefined_epi32 (),
9964 (__mmask8) -1);
756c5857
AI
9965}
9966
79fb4764 9967extern __inline __m512i
756c5857 9968__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
9969_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
9970 __m512i __Y)
756c5857 9971{
79fb4764
HJ
9972 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
9973 (__v8di) __X,
9974 (__v8di) __W,
9975 __M);
756c5857
AI
9976}
9977
756c5857
AI
9978extern __inline __m512i
9979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9980_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
756c5857 9981{
79fb4764
HJ
9982 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
9983 (__v16si) __X,
9984 (__v16si)
9985 _mm512_setzero_si512 (),
9986 __M);
756c5857
AI
9987}
9988
9989extern __inline __m512i
9990__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 9991_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
756c5857 9992{
79fb4764
HJ
9993 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
9994 (__v16si) __X,
9995 (__v16si)
9996 _mm512_undefined_epi32 (),
9997 (__mmask16) -1);
756c5857
AI
9998}
9999
10000extern __inline __m512i
10001__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
10002_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
10003 __m512i __Y)
756c5857 10004{
79fb4764
HJ
10005 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
10006 (__v16si) __X,
10007 (__v16si) __W,
10008 __M);
756c5857
AI
10009}
10010
79fb4764 10011extern __inline __m512d
756c5857 10012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10013_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
756c5857 10014{
79fb4764
HJ
10015 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
10016 (__v8di) __X,
10017 (__v8df)
10018 _mm512_undefined_pd (),
10019 (__mmask8) -1);
756c5857
AI
10020}
10021
79fb4764 10022extern __inline __m512d
756c5857 10023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10024_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
756c5857 10025{
79fb4764
HJ
10026 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
10027 (__v8di) __X,
10028 (__v8df) __W,
10029 (__mmask8) __U);
756c5857
AI
10030}
10031
79fb4764 10032extern __inline __m512d
756c5857 10033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10034_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
756c5857 10035{
79fb4764
HJ
10036 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
10037 (__v8di) __X,
10038 (__v8df)
10039 _mm512_setzero_pd (),
10040 (__mmask8) __U);
756c5857 10041}
756c5857 10042
79fb4764 10043extern __inline __m512
756c5857 10044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10045_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
756c5857 10046{
79fb4764
HJ
10047 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
10048 (__v16si) __X,
10049 (__v16sf)
10050 _mm512_undefined_ps (),
10051 (__mmask16) -1);
756c5857
AI
10052}
10053
79fb4764 10054extern __inline __m512
756c5857 10055__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10056_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
756c5857 10057{
79fb4764
HJ
10058 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
10059 (__v16si) __X,
10060 (__v16sf) __W,
10061 (__mmask16) __U);
756c5857
AI
10062}
10063
79fb4764 10064extern __inline __m512
756c5857 10065__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10066_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
756c5857 10067{
79fb4764
HJ
10068 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
10069 (__v16si) __X,
10070 (__v16sf)
10071 _mm512_setzero_ps (),
10072 (__mmask16) __U);
756c5857
AI
10073}
10074
79fb4764
HJ
10075#ifdef __OPTIMIZE__
10076extern __inline __m512
756c5857 10077__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10078_mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
756c5857 10079{
79fb4764
HJ
10080 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
10081 (__v16sf) __V, __imm,
10082 (__v16sf)
10083 _mm512_undefined_ps (),
10084 (__mmask16) -1);
756c5857
AI
10085}
10086
79fb4764 10087extern __inline __m512
756c5857 10088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
10089_mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
10090 __m512 __V, const int __imm)
756c5857 10091{
79fb4764
HJ
10092 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
10093 (__v16sf) __V, __imm,
10094 (__v16sf) __W,
10095 (__mmask16) __U);
756c5857
AI
10096}
10097
79fb4764 10098extern __inline __m512
756c5857 10099__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10100_mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
756c5857 10101{
79fb4764
HJ
10102 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
10103 (__v16sf) __V, __imm,
10104 (__v16sf)
10105 _mm512_setzero_ps (),
10106 (__mmask16) __U);
756c5857
AI
10107}
10108
79fb4764 10109extern __inline __m512d
756c5857 10110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10111_mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
756c5857 10112{
79fb4764
HJ
10113 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
10114 (__v8df) __V, __imm,
10115 (__v8df)
10116 _mm512_undefined_pd (),
10117 (__mmask8) -1);
756c5857
AI
10118}
10119
79fb4764 10120extern __inline __m512d
756c5857 10121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
10122_mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
10123 __m512d __V, const int __imm)
756c5857 10124{
79fb4764
HJ
10125 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
10126 (__v8df) __V, __imm,
10127 (__v8df) __W,
10128 (__mmask8) __U);
756c5857
AI
10129}
10130
79fb4764 10131extern __inline __m512d
d256b866 10132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
10133_mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
10134 const int __imm)
d256b866 10135{
79fb4764
HJ
10136 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
10137 (__v8df) __V, __imm,
10138 (__v8df)
10139 _mm512_setzero_pd (),
10140 (__mmask8) __U);
d256b866
IT
10141}
10142
79fb4764 10143extern __inline __m512d
275be1da 10144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
10145_mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
10146 const int __imm, const int __R)
275be1da 10147{
79fb4764
HJ
10148 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
10149 (__v8df) __B,
10150 (__v8di) __C,
10151 __imm,
10152 (__mmask8) -1, __R);
275be1da
IT
10153}
10154
79fb4764 10155extern __inline __m512d
275be1da 10156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
10157_mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
10158 __m512i __C, const int __imm, const int __R)
275be1da 10159{
79fb4764
HJ
10160 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
10161 (__v8df) __B,
10162 (__v8di) __C,
10163 __imm,
10164 (__mmask8) __U, __R);
275be1da
IT
10165}
10166
79fb4764 10167extern __inline __m512d
d256b866 10168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
10169_mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
10170 __m512i __C, const int __imm, const int __R)
d256b866 10171{
79fb4764
HJ
10172 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
10173 (__v8df) __B,
10174 (__v8di) __C,
10175 __imm,
10176 (__mmask8) __U, __R);
d256b866
IT
10177}
10178
79fb4764 10179extern __inline __m512
275be1da 10180__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
10181_mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
10182 const int __imm, const int __R)
275be1da 10183{
79fb4764
HJ
10184 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
10185 (__v16sf) __B,
10186 (__v16si) __C,
10187 __imm,
10188 (__mmask16) -1, __R);
275be1da
IT
10189}
10190
79fb4764 10191extern __inline __m512
d256b866 10192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
10193_mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
10194 __m512i __C, const int __imm, const int __R)
d256b866 10195{
79fb4764
HJ
10196 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
10197 (__v16sf) __B,
10198 (__v16si) __C,
10199 __imm,
10200 (__mmask16) __U, __R);
d256b866
IT
10201}
10202
79fb4764 10203extern __inline __m512
275be1da 10204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
10205_mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
10206 __m512i __C, const int __imm, const int __R)
275be1da 10207{
79fb4764
HJ
10208 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
10209 (__v16sf) __B,
10210 (__v16si) __C,
10211 __imm,
10212 (__mmask16) __U, __R);
275be1da
IT
10213}
10214
79fb4764
HJ
10215#else
10216#define _mm512_shuffle_pd(X, Y, C) \
10217 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
10218 (__v8df)(__m512d)(Y), (int)(C),\
10219 (__v8df)(__m512d)_mm512_undefined_pd(),\
10220 (__mmask8)-1))
10221
10222#define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
10223 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
10224 (__v8df)(__m512d)(Y), (int)(C),\
10225 (__v8df)(__m512d)(W),\
10226 (__mmask8)(U)))
10227
10228#define _mm512_maskz_shuffle_pd(U, X, Y, C) \
10229 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
10230 (__v8df)(__m512d)(Y), (int)(C),\
10231 (__v8df)(__m512d)_mm512_setzero_pd(),\
10232 (__mmask8)(U)))
10233
10234#define _mm512_shuffle_ps(X, Y, C) \
10235 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
10236 (__v16sf)(__m512)(Y), (int)(C),\
10237 (__v16sf)(__m512)_mm512_undefined_ps(),\
10238 (__mmask16)-1))
10239
10240#define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
10241 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
10242 (__v16sf)(__m512)(Y), (int)(C),\
10243 (__v16sf)(__m512)(W),\
10244 (__mmask16)(U)))
10245
10246#define _mm512_maskz_shuffle_ps(U, X, Y, C) \
10247 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
10248 (__v16sf)(__m512)(Y), (int)(C),\
10249 (__v16sf)(__m512)_mm512_setzero_ps(),\
10250 (__mmask16)(U)))
10251
10252#define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
10253 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
10254 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
10255 (__mmask8)(-1), (R)))
10256
10257#define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
10258 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
10259 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
10260 (__mmask8)(U), (R)))
10261
10262#define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
10263 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
10264 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
10265 (__mmask8)(U), (R)))
10266
10267#define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
10268 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
10269 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
10270 (__mmask16)(-1), (R)))
10271
10272#define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
10273 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
10274 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
10275 (__mmask16)(U), (R)))
10276
10277#define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
10278 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
10279 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
10280 (__mmask16)(U), (R)))
10281
10282#endif
10283
10284extern __inline __m512
d256b866 10285__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10286_mm512_movehdup_ps (__m512 __A)
d256b866 10287{
79fb4764
HJ
10288 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
10289 (__v16sf)
10290 _mm512_undefined_ps (),
10291 (__mmask16) -1);
d256b866
IT
10292}
10293
79fb4764 10294extern __inline __m512
275be1da 10295__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10296_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
275be1da 10297{
79fb4764
HJ
10298 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
10299 (__v16sf) __W,
10300 (__mmask16) __U);
275be1da
IT
10301}
10302
79fb4764 10303extern __inline __m512
d256b866 10304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10305_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
d256b866 10306{
79fb4764
HJ
10307 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
10308 (__v16sf)
10309 _mm512_setzero_ps (),
10310 (__mmask16) __U);
d256b866
IT
10311}
10312
79fb4764 10313extern __inline __m512
275be1da 10314__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10315_mm512_moveldup_ps (__m512 __A)
275be1da 10316{
79fb4764
HJ
10317 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
10318 (__v16sf)
10319 _mm512_undefined_ps (),
10320 (__mmask16) -1);
275be1da
IT
10321}
10322
79fb4764 10323extern __inline __m512
d256b866 10324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10325_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
d256b866 10326{
79fb4764
HJ
10327 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
10328 (__v16sf) __W,
10329 (__mmask16) __U);
d256b866
IT
10330}
10331
79fb4764 10332extern __inline __m512
275be1da 10333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10334_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
275be1da 10335{
79fb4764
HJ
10336 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
10337 (__v16sf)
10338 _mm512_setzero_ps (),
10339 (__mmask16) __U);
275be1da
IT
10340}
10341
79fb4764 10342extern __inline __m512i
d256b866 10343__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10344_mm512_or_si512 (__m512i __A, __m512i __B)
d256b866 10345{
79fb4764 10346 return (__m512i) ((__v16su) __A | (__v16su) __B);
d256b866
IT
10347}
10348
79fb4764 10349extern __inline __m512i
275be1da 10350__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10351_mm512_or_epi32 (__m512i __A, __m512i __B)
275be1da 10352{
79fb4764 10353 return (__m512i) ((__v16su) __A | (__v16su) __B);
275be1da
IT
10354}
10355
79fb4764 10356extern __inline __m512i
d256b866 10357__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10358_mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
d256b866 10359{
79fb4764
HJ
10360 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
10361 (__v16si) __B,
10362 (__v16si) __W,
10363 (__mmask16) __U);
d256b866
IT
10364}
10365
79fb4764 10366extern __inline __m512i
275be1da 10367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10368_mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
275be1da 10369{
79fb4764
HJ
10370 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
10371 (__v16si) __B,
10372 (__v16si)
10373 _mm512_setzero_si512 (),
10374 (__mmask16) __U);
275be1da
IT
10375}
10376
79fb4764 10377extern __inline __m512i
d256b866 10378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10379_mm512_or_epi64 (__m512i __A, __m512i __B)
d256b866 10380{
79fb4764 10381 return (__m512i) ((__v8du) __A | (__v8du) __B);
d256b866
IT
10382}
10383
79fb4764 10384extern __inline __m512i
275be1da 10385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10386_mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
275be1da 10387{
79fb4764
HJ
10388 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
10389 (__v8di) __B,
10390 (__v8di) __W,
10391 (__mmask8) __U);
275be1da
IT
10392}
10393
79fb4764 10394extern __inline __m512i
d256b866 10395__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10396_mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
d256b866 10397{
79fb4764
HJ
10398 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
10399 (__v8di) __B,
10400 (__v8di)
10401 _mm512_setzero_si512 (),
10402 (__mmask8) __U);
d256b866
IT
10403}
10404
79fb4764 10405extern __inline __m512i
275be1da 10406__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10407_mm512_xor_si512 (__m512i __A, __m512i __B)
275be1da 10408{
79fb4764 10409 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
275be1da
IT
10410}
10411
79fb4764 10412extern __inline __m512i
d256b866 10413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10414_mm512_xor_epi32 (__m512i __A, __m512i __B)
d256b866 10415{
79fb4764 10416 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
d256b866
IT
10417}
10418
79fb4764 10419extern __inline __m512i
275be1da 10420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10421_mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
275be1da 10422{
79fb4764
HJ
10423 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
10424 (__v16si) __B,
10425 (__v16si) __W,
10426 (__mmask16) __U);
275be1da
IT
10427}
10428
79fb4764 10429extern __inline __m512i
d256b866 10430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10431_mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
d256b866 10432{
79fb4764
HJ
10433 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
10434 (__v16si) __B,
10435 (__v16si)
10436 _mm512_setzero_si512 (),
10437 (__mmask16) __U);
d256b866
IT
10438}
10439
79fb4764 10440extern __inline __m512i
d256b866 10441__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10442_mm512_xor_epi64 (__m512i __A, __m512i __B)
d256b866 10443{
79fb4764 10444 return (__m512i) ((__v8du) __A ^ (__v8du) __B);
d256b866
IT
10445}
10446
79fb4764 10447extern __inline __m512i
275be1da 10448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10449_mm512_mask_xor_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
275be1da 10450{
79fb4764
HJ
10451 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
10452 (__v8di) __B,
10453 (__v8di) __W,
10454 (__mmask8) __U);
275be1da
IT
10455}
10456
79fb4764 10457extern __inline __m512i
275be1da 10458__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10459_mm512_maskz_xor_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
275be1da 10460{
79fb4764
HJ
10461 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
10462 (__v8di) __B,
10463 (__v8di)
10464 _mm512_setzero_si512 (),
10465 (__mmask8) __U);
275be1da
IT
10466}
10467
79fb4764
HJ
10468#ifdef __OPTIMIZE__
10469extern __inline __m512i
d256b866 10470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10471_mm512_rol_epi32 (__m512i __A, const int __B)
d256b866 10472{
79fb4764
HJ
10473 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
10474 (__v16si)
10475 _mm512_undefined_epi32 (),
10476 (__mmask16) -1);
d256b866
IT
10477}
10478
79fb4764 10479extern __inline __m512i
275be1da 10480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10481_mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
275be1da 10482{
79fb4764
HJ
10483 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
10484 (__v16si) __W,
10485 (__mmask16) __U);
275be1da
IT
10486}
10487
79fb4764 10488extern __inline __m512i
d256b866 10489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10490_mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
d256b866 10491{
79fb4764
HJ
10492 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
10493 (__v16si)
10494 _mm512_setzero_si512 (),
10495 (__mmask16) __U);
d256b866
IT
10496}
10497
79fb4764 10498extern __inline __m512i
275be1da 10499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10500_mm512_ror_epi32 (__m512i __A, int __B)
275be1da 10501{
79fb4764
HJ
10502 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
10503 (__v16si)
10504 _mm512_undefined_epi32 (),
10505 (__mmask16) -1);
275be1da
IT
10506}
10507
79fb4764 10508extern __inline __m512i
d256b866 10509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10510_mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
d256b866 10511{
79fb4764
HJ
10512 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
10513 (__v16si) __W,
10514 (__mmask16) __U);
d256b866
IT
10515}
10516
79fb4764 10517extern __inline __m512i
d8ea3e7c 10518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10519_mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
d8ea3e7c 10520{
79fb4764
HJ
10521 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
10522 (__v16si)
10523 _mm512_setzero_si512 (),
10524 (__mmask16) __U);
d8ea3e7c
AS
10525}
10526
79fb4764 10527extern __inline __m512i
d8ea3e7c 10528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10529_mm512_rol_epi64 (__m512i __A, const int __B)
d8ea3e7c 10530{
79fb4764
HJ
10531 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
10532 (__v8di)
10533 _mm512_undefined_epi32 (),
10534 (__mmask8) -1);
d8ea3e7c
AS
10535}
10536
79fb4764 10537extern __inline __m512i
756c5857 10538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10539_mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
756c5857 10540{
79fb4764
HJ
10541 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
10542 (__v8di) __W,
10543 (__mmask8) __U);
756c5857
AI
10544}
10545
79fb4764 10546extern __inline __m512i
756c5857 10547__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10548_mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
756c5857 10549{
79fb4764
HJ
10550 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
10551 (__v8di)
10552 _mm512_setzero_si512 (),
10553 (__mmask8) __U);
756c5857
AI
10554}
10555
79fb4764 10556extern __inline __m512i
756c5857 10557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10558_mm512_ror_epi64 (__m512i __A, int __B)
756c5857 10559{
79fb4764
HJ
10560 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
10561 (__v8di)
10562 _mm512_undefined_epi32 (),
10563 (__mmask8) -1);
756c5857
AI
10564}
10565
79fb4764 10566extern __inline __m512i
756c5857 10567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10568_mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
756c5857 10569{
79fb4764
HJ
10570 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
10571 (__v8di) __W,
10572 (__mmask8) __U);
756c5857
AI
10573}
10574
79fb4764 10575extern __inline __m512i
756c5857 10576__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10577_mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
756c5857 10578{
79fb4764
HJ
10579 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
10580 (__v8di)
10581 _mm512_setzero_si512 (),
10582 (__mmask8) __U);
756c5857
AI
10583}
10584
79fb4764
HJ
10585#else
10586#define _mm512_rol_epi32(A, B) \
10587 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
10588 (int)(B), \
10589 (__v16si)_mm512_undefined_epi32 (), \
10590 (__mmask16)(-1)))
10591#define _mm512_mask_rol_epi32(W, U, A, B) \
10592 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
10593 (int)(B), \
10594 (__v16si)(__m512i)(W), \
10595 (__mmask16)(U)))
10596#define _mm512_maskz_rol_epi32(U, A, B) \
10597 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
10598 (int)(B), \
10599 (__v16si)_mm512_setzero_si512 (), \
10600 (__mmask16)(U)))
10601#define _mm512_ror_epi32(A, B) \
10602 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
10603 (int)(B), \
10604 (__v16si)_mm512_undefined_epi32 (), \
10605 (__mmask16)(-1)))
10606#define _mm512_mask_ror_epi32(W, U, A, B) \
10607 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
10608 (int)(B), \
10609 (__v16si)(__m512i)(W), \
10610 (__mmask16)(U)))
10611#define _mm512_maskz_ror_epi32(U, A, B) \
10612 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
10613 (int)(B), \
10614 (__v16si)_mm512_setzero_si512 (), \
10615 (__mmask16)(U)))
10616#define _mm512_rol_epi64(A, B) \
10617 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
10618 (int)(B), \
10619 (__v8di)_mm512_undefined_epi32 (), \
10620 (__mmask8)(-1)))
10621#define _mm512_mask_rol_epi64(W, U, A, B) \
10622 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
10623 (int)(B), \
10624 (__v8di)(__m512i)(W), \
10625 (__mmask8)(U)))
10626#define _mm512_maskz_rol_epi64(U, A, B) \
10627 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
10628 (int)(B), \
10629 (__v8di)_mm512_setzero_si512 (), \
10630 (__mmask8)(U)))
10631
10632#define _mm512_ror_epi64(A, B) \
10633 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
10634 (int)(B), \
10635 (__v8di)_mm512_undefined_epi32 (), \
10636 (__mmask8)(-1)))
10637#define _mm512_mask_ror_epi64(W, U, A, B) \
10638 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
10639 (int)(B), \
10640 (__v8di)(__m512i)(W), \
10641 (__mmask8)(U)))
10642#define _mm512_maskz_ror_epi64(U, A, B) \
10643 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
10644 (int)(B), \
10645 (__v8di)_mm512_setzero_si512 (), \
10646 (__mmask8)(U)))
10647#endif
10648
10649extern __inline __m512i
756c5857 10650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10651_mm512_and_si512 (__m512i __A, __m512i __B)
756c5857 10652{
79fb4764 10653 return (__m512i) ((__v16su) __A & (__v16su) __B);
756c5857
AI
10654}
10655
79fb4764 10656extern __inline __m512i
756c5857 10657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10658_mm512_and_epi32 (__m512i __A, __m512i __B)
756c5857 10659{
79fb4764 10660 return (__m512i) ((__v16su) __A & (__v16su) __B);
756c5857
AI
10661}
10662
79fb4764 10663extern __inline __m512i
756c5857 10664__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10665_mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
756c5857 10666{
79fb4764
HJ
10667 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
10668 (__v16si) __B,
10669 (__v16si) __W,
10670 (__mmask16) __U);
756c5857
AI
10671}
10672
79fb4764 10673extern __inline __m512i
756c5857 10674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10675_mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
756c5857 10676{
79fb4764
HJ
10677 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
10678 (__v16si) __B,
10679 (__v16si)
10680 _mm512_setzero_si512 (),
10681 (__mmask16) __U);
756c5857
AI
10682}
10683
79fb4764 10684extern __inline __m512i
756c5857 10685__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10686_mm512_and_epi64 (__m512i __A, __m512i __B)
756c5857 10687{
79fb4764 10688 return (__m512i) ((__v8du) __A & (__v8du) __B);
756c5857
AI
10689}
10690
79fb4764 10691extern __inline __m512i
756c5857 10692__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10693_mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
756c5857 10694{
79fb4764
HJ
10695 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
10696 (__v8di) __B,
10697 (__v8di) __W, __U);
756c5857
AI
10698}
10699
79fb4764 10700extern __inline __m512i
756c5857 10701__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10702_mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
756c5857 10703{
79fb4764
HJ
10704 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
10705 (__v8di) __B,
10706 (__v8di)
10707 _mm512_setzero_pd (),
10708 __U);
756c5857
AI
10709}
10710
79fb4764 10711extern __inline __m512i
756c5857 10712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10713_mm512_andnot_si512 (__m512i __A, __m512i __B)
756c5857 10714{
79fb4764
HJ
10715 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
10716 (__v16si) __B,
10717 (__v16si)
10718 _mm512_undefined_epi32 (),
10719 (__mmask16) -1);
756c5857
AI
10720}
10721
79fb4764 10722extern __inline __m512i
756c5857 10723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10724_mm512_andnot_epi32 (__m512i __A, __m512i __B)
756c5857 10725{
79fb4764
HJ
10726 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
10727 (__v16si) __B,
10728 (__v16si)
10729 _mm512_undefined_epi32 (),
10730 (__mmask16) -1);
756c5857
AI
10731}
10732
79fb4764 10733extern __inline __m512i
756c5857 10734__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10735_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
756c5857 10736{
79fb4764
HJ
10737 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
10738 (__v16si) __B,
10739 (__v16si) __W,
10740 (__mmask16) __U);
756c5857
AI
10741}
10742
79fb4764 10743extern __inline __m512i
756c5857 10744__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10745_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
756c5857 10746{
79fb4764
HJ
10747 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
10748 (__v16si) __B,
10749 (__v16si)
10750 _mm512_setzero_si512 (),
10751 (__mmask16) __U);
756c5857
AI
10752}
10753
79fb4764 10754extern __inline __m512i
756c5857 10755__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10756_mm512_andnot_epi64 (__m512i __A, __m512i __B)
756c5857 10757{
79fb4764
HJ
10758 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
10759 (__v8di) __B,
10760 (__v8di)
10761 _mm512_undefined_epi32 (),
10762 (__mmask8) -1);
756c5857
AI
10763}
10764
79fb4764 10765extern __inline __m512i
756c5857 10766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10767_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
756c5857 10768{
79fb4764
HJ
10769 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
10770 (__v8di) __B,
10771 (__v8di) __W, __U);
756c5857
AI
10772}
10773
79fb4764 10774extern __inline __m512i
756c5857 10775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10776_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
756c5857 10777{
79fb4764
HJ
10778 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
10779 (__v8di) __B,
10780 (__v8di)
10781 _mm512_setzero_pd (),
10782 __U);
756c5857
AI
10783}
10784
79fb4764 10785extern __inline __mmask16
756c5857 10786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10787_mm512_test_epi32_mask (__m512i __A, __m512i __B)
756c5857 10788{
79fb4764
HJ
10789 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
10790 (__v16si) __B,
10791 (__mmask16) -1);
756c5857
AI
10792}
10793
79fb4764 10794extern __inline __mmask16
756c5857 10795__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10796_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
756c5857 10797{
79fb4764
HJ
10798 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
10799 (__v16si) __B, __U);
756c5857
AI
10800}
10801
79fb4764 10802extern __inline __mmask8
756c5857 10803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10804_mm512_test_epi64_mask (__m512i __A, __m512i __B)
756c5857 10805{
79fb4764
HJ
10806 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
10807 (__v8di) __B,
10808 (__mmask8) -1);
756c5857
AI
10809}
10810
79fb4764 10811extern __inline __mmask8
756c5857 10812__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10813_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
756c5857 10814{
79fb4764 10815 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
756c5857
AI
10816}
10817
79fb4764 10818extern __inline __mmask16
756c5857 10819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10820_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
756c5857 10821{
79fb4764
HJ
10822 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
10823 (__v16si) __B,
10824 (__mmask16) -1);
756c5857
AI
10825}
10826
79fb4764 10827extern __inline __mmask16
756c5857 10828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10829_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
756c5857 10830{
79fb4764
HJ
10831 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
10832 (__v16si) __B, __U);
756c5857
AI
10833}
10834
79fb4764 10835extern __inline __mmask8
756c5857 10836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10837_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
756c5857 10838{
79fb4764
HJ
10839 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
10840 (__v8di) __B,
10841 (__mmask8) -1);
756c5857
AI
10842}
10843
79fb4764 10844extern __inline __mmask8
756c5857 10845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10846_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
756c5857 10847{
79fb4764
HJ
10848 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
10849 (__v8di) __B, __U);
756c5857
AI
10850}
10851
79fb4764 10852extern __inline __m512
756c5857 10853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10854_mm512_abs_ps (__m512 __A)
756c5857 10855{
79fb4764
HJ
10856 return (__m512) _mm512_and_epi32 ((__m512i) __A,
10857 _mm512_set1_epi32 (0x7fffffff));
756c5857
AI
10858}
10859
79fb4764 10860extern __inline __m512
756c5857 10861__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10862_mm512_mask_abs_ps (__m512 __W, __mmask16 __U, __m512 __A)
756c5857 10863{
79fb4764
HJ
10864 return (__m512) _mm512_mask_and_epi32 ((__m512i) __W, __U, (__m512i) __A,
10865 _mm512_set1_epi32 (0x7fffffff));
756c5857
AI
10866}
10867
79fb4764 10868extern __inline __m512d
756c5857 10869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10870_mm512_abs_pd (__m512d __A)
756c5857 10871{
79fb4764
HJ
10872 return (__m512d) _mm512_and_epi64 ((__m512i) __A,
10873 _mm512_set1_epi64 (0x7fffffffffffffffLL));
756c5857
AI
10874}
10875
79fb4764 10876extern __inline __m512d
756c5857 10877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10878_mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512d __A)
756c5857 10879{
79fb4764
HJ
10880 return (__m512d)
10881 _mm512_mask_and_epi64 ((__m512i) __W, __U, (__m512i) __A,
10882 _mm512_set1_epi64 (0x7fffffffffffffffLL));
756c5857
AI
10883}
10884
10885extern __inline __m512i
10886__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10887_mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
756c5857 10888{
79fb4764
HJ
10889 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
10890 (__v16si) __B,
10891 (__v16si)
10892 _mm512_undefined_epi32 (),
10893 (__mmask16) -1);
756c5857
AI
10894}
10895
79fb4764 10896extern __inline __m512i
756c5857 10897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
10898_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
10899 __m512i __B)
756c5857 10900{
79fb4764
HJ
10901 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
10902 (__v16si) __B,
10903 (__v16si) __W,
10904 (__mmask16) __U);
756c5857
AI
10905}
10906
79fb4764 10907extern __inline __m512i
756c5857 10908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10909_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
756c5857 10910{
79fb4764
HJ
10911 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
10912 (__v16si) __B,
10913 (__v16si)
10914 _mm512_setzero_si512 (),
10915 (__mmask16) __U);
756c5857
AI
10916}
10917
79fb4764 10918extern __inline __m512i
756c5857 10919__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10920_mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
756c5857 10921{
79fb4764
HJ
10922 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
10923 (__v8di) __B,
10924 (__v8di)
10925 _mm512_undefined_epi32 (),
10926 (__mmask8) -1);
756c5857
AI
10927}
10928
79fb4764 10929extern __inline __m512i
756c5857 10930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10931_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
756c5857 10932{
79fb4764
HJ
10933 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
10934 (__v8di) __B,
10935 (__v8di) __W,
10936 (__mmask8) __U);
756c5857
AI
10937}
10938
79fb4764 10939extern __inline __m512i
756c5857 10940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10941_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
756c5857 10942{
79fb4764
HJ
10943 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
10944 (__v8di) __B,
10945 (__v8di)
10946 _mm512_setzero_si512 (),
10947 (__mmask8) __U);
756c5857
AI
10948}
10949
79fb4764 10950extern __inline __m512i
756c5857 10951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10952_mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
756c5857 10953{
79fb4764
HJ
10954 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
10955 (__v16si) __B,
10956 (__v16si)
10957 _mm512_undefined_epi32 (),
10958 (__mmask16) -1);
756c5857
AI
10959}
10960
79fb4764 10961extern __inline __m512i
756c5857 10962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
10963_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
10964 __m512i __B)
756c5857 10965{
79fb4764
HJ
10966 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
10967 (__v16si) __B,
10968 (__v16si) __W,
10969 (__mmask16) __U);
756c5857
AI
10970}
10971
79fb4764 10972extern __inline __m512i
756c5857 10973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10974_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
756c5857 10975{
79fb4764
HJ
10976 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
10977 (__v16si) __B,
10978 (__v16si)
10979 _mm512_setzero_si512 (),
10980 (__mmask16) __U);
756c5857
AI
10981}
10982
79fb4764 10983extern __inline __m512i
756c5857 10984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10985_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
756c5857 10986{
79fb4764
HJ
10987 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
10988 (__v8di) __B,
10989 (__v8di)
10990 _mm512_undefined_epi32 (),
10991 (__mmask8) -1);
756c5857
AI
10992}
10993
79fb4764 10994extern __inline __m512i
756c5857 10995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 10996_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
756c5857 10997{
79fb4764
HJ
10998 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
10999 (__v8di) __B,
11000 (__v8di) __W,
11001 (__mmask8) __U);
756c5857
AI
11002}
11003
79fb4764 11004extern __inline __m512i
756c5857 11005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11006_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
756c5857 11007{
79fb4764
HJ
11008 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
11009 (__v8di) __B,
11010 (__v8di)
11011 _mm512_setzero_si512 (),
11012 (__mmask8) __U);
756c5857
AI
11013}
11014
79fb4764 11015extern __inline __m512d
756c5857 11016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11017_mm512_movedup_pd (__m512d __A)
756c5857 11018{
79fb4764
HJ
11019 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
11020 (__v8df)
11021 _mm512_undefined_pd (),
11022 (__mmask8) -1);
756c5857
AI
11023}
11024
79fb4764 11025extern __inline __m512d
756c5857 11026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11027_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
756c5857 11028{
79fb4764
HJ
11029 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
11030 (__v8df) __W,
11031 (__mmask8) __U);
756c5857
AI
11032}
11033
79fb4764 11034extern __inline __m512d
756c5857 11035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11036_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
756c5857 11037{
79fb4764
HJ
11038 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
11039 (__v8df)
11040 _mm512_setzero_pd (),
11041 (__mmask8) __U);
756c5857
AI
11042}
11043
79fb4764 11044extern __inline __m512d
756c5857 11045__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11046_mm512_unpacklo_pd (__m512d __A, __m512d __B)
756c5857 11047{
79fb4764
HJ
11048 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
11049 (__v8df) __B,
11050 (__v8df)
11051 _mm512_undefined_pd (),
11052 (__mmask8) -1);
756c5857
AI
11053}
11054
79fb4764 11055extern __inline __m512d
756c5857 11056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11057_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
756c5857 11058{
79fb4764
HJ
11059 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
11060 (__v8df) __B,
11061 (__v8df) __W,
11062 (__mmask8) __U);
11063}
756c5857 11064
79fb4764
HJ
11065extern __inline __m512d
11066__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11067_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
11068{
11069 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
11070 (__v8df) __B,
11071 (__v8df)
11072 _mm512_setzero_pd (),
11073 (__mmask8) __U);
11074}
756c5857 11075
79fb4764
HJ
11076extern __inline __m512d
11077__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11078_mm512_unpackhi_pd (__m512d __A, __m512d __B)
11079{
11080 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
11081 (__v8df) __B,
11082 (__v8df)
11083 _mm512_undefined_pd (),
11084 (__mmask8) -1);
11085}
756c5857
AI
11086
11087extern __inline __m512d
11088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11089_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
756c5857 11090{
79fb4764
HJ
11091 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
11092 (__v8df) __B,
11093 (__v8df) __W,
11094 (__mmask8) __U);
756c5857
AI
11095}
11096
11097extern __inline __m512d
11098__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11099_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
756c5857 11100{
79fb4764
HJ
11101 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
11102 (__v8df) __B,
11103 (__v8df)
11104 _mm512_setzero_pd (),
11105 (__mmask8) __U);
756c5857
AI
11106}
11107
79fb4764 11108extern __inline __m512
756c5857 11109__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11110_mm512_unpackhi_ps (__m512 __A, __m512 __B)
756c5857 11111{
79fb4764
HJ
11112 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
11113 (__v16sf) __B,
11114 (__v16sf)
11115 _mm512_undefined_ps (),
11116 (__mmask16) -1);
756c5857
AI
11117}
11118
11119extern __inline __m512
11120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11121_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
756c5857 11122{
79fb4764
HJ
11123 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
11124 (__v16sf) __B,
11125 (__v16sf) __W,
11126 (__mmask16) __U);
756c5857
AI
11127}
11128
11129extern __inline __m512
11130__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11131_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
756c5857 11132{
79fb4764
HJ
11133 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
11134 (__v16sf) __B,
11135 (__v16sf)
11136 _mm512_setzero_ps (),
11137 (__mmask16) __U);
756c5857
AI
11138}
11139
79fb4764
HJ
11140#ifdef __OPTIMIZE__
11141extern __inline __m512d
756c5857 11142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11143_mm512_cvt_roundps_pd (__m256 __A, const int __R)
756c5857 11144{
79fb4764
HJ
11145 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
11146 (__v8df)
11147 _mm512_undefined_pd (),
11148 (__mmask8) -1, __R);
756c5857
AI
11149}
11150
79fb4764 11151extern __inline __m512d
756c5857 11152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
11153_mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
11154 const int __R)
756c5857 11155{
79fb4764
HJ
11156 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
11157 (__v8df) __W,
11158 (__mmask8) __U, __R);
756c5857
AI
11159}
11160
79fb4764 11161extern __inline __m512d
756c5857 11162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11163_mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
756c5857 11164{
79fb4764
HJ
11165 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
11166 (__v8df)
11167 _mm512_setzero_pd (),
11168 (__mmask8) __U, __R);
756c5857
AI
11169}
11170
79fb4764 11171extern __inline __m512
756c5857 11172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11173_mm512_cvt_roundph_ps (__m256i __A, const int __R)
756c5857 11174{
79fb4764
HJ
11175 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
11176 (__v16sf)
11177 _mm512_undefined_ps (),
11178 (__mmask16) -1, __R);
756c5857
AI
11179}
11180
79fb4764 11181extern __inline __m512
756c5857 11182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
11183_mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
11184 const int __R)
756c5857 11185{
79fb4764
HJ
11186 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
11187 (__v16sf) __W,
11188 (__mmask16) __U, __R);
756c5857
AI
11189}
11190
79fb4764 11191extern __inline __m512
756c5857 11192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11193_mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
756c5857 11194{
79fb4764
HJ
11195 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
11196 (__v16sf)
11197 _mm512_setzero_ps (),
11198 (__mmask16) __U, __R);
756c5857
AI
11199}
11200
79fb4764 11201extern __inline __m256i
756c5857 11202__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11203_mm512_cvt_roundps_ph (__m512 __A, const int __I)
756c5857 11204{
79fb4764
HJ
11205 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
11206 __I,
11207 (__v16hi)
11208 _mm256_undefined_si256 (),
11209 -1);
756c5857
AI
11210}
11211
79fb4764 11212extern __inline __m256i
756c5857 11213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11214_mm512_cvtps_ph (__m512 __A, const int __I)
756c5857 11215{
79fb4764
HJ
11216 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
11217 __I,
11218 (__v16hi)
11219 _mm256_undefined_si256 (),
11220 -1);
756c5857
AI
11221}
11222
79fb4764 11223extern __inline __m256i
756c5857 11224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
11225_mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
11226 const int __I)
756c5857 11227{
79fb4764
HJ
11228 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
11229 __I,
11230 (__v16hi) __U,
11231 (__mmask16) __W);
756c5857
AI
11232}
11233
79fb4764
HJ
11234extern __inline __m256i
11235__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11236_mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
11237{
11238 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
11239 __I,
11240 (__v16hi) __U,
11241 (__mmask16) __W);
11242}
11243
11244extern __inline __m256i
11245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11246_mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
11247{
11248 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
11249 __I,
11250 (__v16hi)
11251 _mm256_setzero_si256 (),
11252 (__mmask16) __W);
11253}
11254
11255extern __inline __m256i
11256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11257_mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
11258{
11259 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
11260 __I,
11261 (__v16hi)
11262 _mm256_setzero_si256 (),
11263 (__mmask16) __W);
11264}
11265#else
11266#define _mm512_cvt_roundps_pd(A, B) \
11267 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
11268
11269#define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
11270 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
11271
11272#define _mm512_maskz_cvt_roundps_pd(U, A, B) \
11273 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
11274
11275#define _mm512_cvt_roundph_ps(A, B) \
11276 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
11277
11278#define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
11279 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
11280
11281#define _mm512_maskz_cvt_roundph_ps(U, A, B) \
11282 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
11283
11284#define _mm512_cvt_roundps_ph(A, I) \
11285 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
11286 (__v16hi)_mm256_undefined_si256 (), -1))
11287#define _mm512_cvtps_ph(A, I) \
11288 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
11289 (__v16hi)_mm256_undefined_si256 (), -1))
11290#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
11291 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
11292 (__v16hi)(__m256i)(U), (__mmask16) (W)))
11293#define _mm512_mask_cvtps_ph(U, W, A, I) \
11294 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
11295 (__v16hi)(__m256i)(U), (__mmask16) (W)))
11296#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
11297 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
11298 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
11299#define _mm512_maskz_cvtps_ph(W, A, I) \
11300 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
11301 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
11302#endif
11303
11304#ifdef __OPTIMIZE__
11305extern __inline __m256
756c5857 11306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11307_mm512_cvt_roundpd_ps (__m512d __A, const int __R)
756c5857 11308{
79fb4764
HJ
11309 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
11310 (__v8sf)
11311 _mm256_undefined_ps (),
11312 (__mmask8) -1, __R);
756c5857
AI
11313}
11314
79fb4764 11315extern __inline __m256
756c5857 11316__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
11317_mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
11318 const int __R)
756c5857 11319{
79fb4764
HJ
11320 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
11321 (__v8sf) __W,
11322 (__mmask8) __U, __R);
756c5857
AI
11323}
11324
79fb4764 11325extern __inline __m256
756c5857 11326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11327_mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
756c5857 11328{
79fb4764
HJ
11329 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
11330 (__v8sf)
11331 _mm256_setzero_ps (),
11332 (__mmask8) __U, __R);
756c5857
AI
11333}
11334
79fb4764
HJ
11335#else
11336#define _mm512_cvt_roundpd_ps(A, B) \
11337 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
11338
11339#define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
11340 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
11341
11342#define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
11343 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
11344
11345#endif
11346
11347extern __inline void
756c5857 11348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11349_mm512_stream_si512 (__m512i * __P, __m512i __A)
756c5857 11350{
79fb4764 11351 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
756c5857
AI
11352}
11353
79fb4764 11354extern __inline void
756c5857 11355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11356_mm512_stream_ps (float *__P, __m512 __A)
756c5857 11357{
79fb4764 11358 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
756c5857
AI
11359}
11360
79fb4764 11361extern __inline void
756c5857 11362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11363_mm512_stream_pd (double *__P, __m512d __A)
756c5857 11364{
79fb4764 11365 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
756c5857
AI
11366}
11367
11368extern __inline __m512i
11369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11370_mm512_stream_load_si512 (void *__P)
756c5857 11371{
79fb4764 11372 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
756c5857
AI
11373}
11374
79fb4764
HJ
11375#ifdef __OPTIMIZE__
11376extern __inline __m512
756c5857 11377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11378_mm512_getexp_round_ps (__m512 __A, const int __R)
756c5857 11379{
79fb4764
HJ
11380 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
11381 (__v16sf)
11382 _mm512_undefined_ps (),
11383 (__mmask16) -1, __R);
756c5857
AI
11384}
11385
79fb4764 11386extern __inline __m512
756c5857 11387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
11388_mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
11389 const int __R)
756c5857 11390{
79fb4764
HJ
11391 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
11392 (__v16sf) __W,
11393 (__mmask16) __U, __R);
756c5857
AI
11394}
11395
79fb4764 11396extern __inline __m512
756c5857 11397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11398_mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
756c5857 11399{
79fb4764
HJ
11400 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
11401 (__v16sf)
11402 _mm512_setzero_ps (),
11403 (__mmask16) __U, __R);
756c5857
AI
11404}
11405
79fb4764 11406extern __inline __m512d
756c5857 11407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11408_mm512_getexp_round_pd (__m512d __A, const int __R)
756c5857 11409{
79fb4764
HJ
11410 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
11411 (__v8df)
11412 _mm512_undefined_pd (),
11413 (__mmask8) -1, __R);
756c5857
AI
11414}
11415
79fb4764 11416extern __inline __m512d
756c5857 11417__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
11418_mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
11419 const int __R)
756c5857 11420{
79fb4764
HJ
11421 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
11422 (__v8df) __W,
11423 (__mmask8) __U, __R);
756c5857
AI
11424}
11425
79fb4764 11426extern __inline __m512d
756c5857 11427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11428_mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
756c5857 11429{
79fb4764
HJ
11430 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
11431 (__v8df)
11432 _mm512_setzero_pd (),
11433 (__mmask8) __U, __R);
756c5857
AI
11434}
11435
79fb4764 11436extern __inline __m512d
756c5857 11437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
11438_mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
11439 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
756c5857 11440{
79fb4764
HJ
11441 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
11442 (__C << 2) | __B,
11443 _mm512_undefined_pd (),
11444 (__mmask8) -1, __R);
756c5857
AI
11445}
11446
79fb4764 11447extern __inline __m512d
dea06111 11448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
11449_mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
11450 _MM_MANTISSA_NORM_ENUM __B,
11451 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
dea06111 11452{
79fb4764
HJ
11453 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
11454 (__C << 2) | __B,
11455 (__v8df) __W, __U,
11456 __R);
dea06111
AS
11457}
11458
79fb4764 11459extern __inline __m512d
dea06111 11460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
11461_mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
11462 _MM_MANTISSA_NORM_ENUM __B,
11463 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
dea06111 11464{
79fb4764
HJ
11465 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
11466 (__C << 2) | __B,
11467 (__v8df)
11468 _mm512_setzero_pd (),
11469 __U, __R);
dea06111
AS
11470}
11471
79fb4764 11472extern __inline __m512
dea06111 11473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
11474_mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
11475 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
dea06111 11476{
79fb4764
HJ
11477 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
11478 (__C << 2) | __B,
11479 _mm512_undefined_ps (),
11480 (__mmask16) -1, __R);
dea06111
AS
11481}
11482
79fb4764 11483extern __inline __m512
7cdb6e4c 11484__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
11485_mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
11486 _MM_MANTISSA_NORM_ENUM __B,
11487 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
7cdb6e4c 11488{
79fb4764
HJ
11489 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
11490 (__C << 2) | __B,
11491 (__v16sf) __W, __U,
11492 __R);
7cdb6e4c
AS
11493}
11494
79fb4764 11495extern __inline __m512
7cdb6e4c 11496__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
11497_mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
11498 _MM_MANTISSA_NORM_ENUM __B,
11499 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
7cdb6e4c 11500{
79fb4764
HJ
11501 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
11502 (__C << 2) | __B,
11503 (__v16sf)
11504 _mm512_setzero_ps (),
11505 __U, __R);
7cdb6e4c
AS
11506}
11507
79fb4764
HJ
11508#else
11509#define _mm512_getmant_round_pd(X, B, C, R) \
11510 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
11511 (int)(((C)<<2) | (B)), \
11512 (__v8df)(__m512d)_mm512_undefined_pd(), \
11513 (__mmask8)-1,\
11514 (R)))
11515
11516#define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
11517 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
11518 (int)(((C)<<2) | (B)), \
11519 (__v8df)(__m512d)(W), \
11520 (__mmask8)(U),\
11521 (R)))
11522
11523#define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
11524 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
11525 (int)(((C)<<2) | (B)), \
11526 (__v8df)(__m512d)_mm512_setzero_pd(), \
11527 (__mmask8)(U),\
11528 (R)))
11529#define _mm512_getmant_round_ps(X, B, C, R) \
11530 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
11531 (int)(((C)<<2) | (B)), \
11532 (__v16sf)(__m512)_mm512_undefined_ps(), \
11533 (__mmask16)-1,\
11534 (R)))
11535
11536#define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
11537 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
11538 (int)(((C)<<2) | (B)), \
11539 (__v16sf)(__m512)(W), \
11540 (__mmask16)(U),\
11541 (R)))
11542
11543#define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
11544 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
11545 (int)(((C)<<2) | (B)), \
11546 (__v16sf)(__m512)_mm512_setzero_ps(), \
11547 (__mmask16)(U),\
11548 (R)))
7cdb6e4c 11549
79fb4764
HJ
11550#define _mm512_getexp_round_ps(A, R) \
11551 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
11552 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
7cdb6e4c 11553
79fb4764
HJ
11554#define _mm512_mask_getexp_round_ps(W, U, A, R) \
11555 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
11556 (__v16sf)(__m512)(W), (__mmask16)(U), R))
756c5857 11557
79fb4764
HJ
11558#define _mm512_maskz_getexp_round_ps(U, A, R) \
11559 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
11560 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
756c5857 11561
79fb4764
HJ
11562#define _mm512_getexp_round_pd(A, R) \
11563 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
11564 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
756c5857 11565
79fb4764
HJ
11566#define _mm512_mask_getexp_round_pd(W, U, A, R) \
11567 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
11568 (__v8df)(__m512d)(W), (__mmask8)(U), R))
756c5857 11569
79fb4764
HJ
11570#define _mm512_maskz_getexp_round_pd(U, A, R) \
11571 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
11572 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
11573#endif
756c5857 11574
79fb4764
HJ
11575#ifdef __OPTIMIZE__
11576extern __inline __m512
756c5857 11577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11578_mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
756c5857 11579{
79fb4764
HJ
11580 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
11581 (__v16sf)
11582 _mm512_undefined_ps (),
11583 -1, __R);
756c5857
AI
11584}
11585
79fb4764 11586extern __inline __m512
756c5857 11587__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
11588_mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
11589 const int __imm, const int __R)
756c5857 11590{
79fb4764
HJ
11591 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
11592 (__v16sf) __A,
11593 (__mmask16) __B, __R);
756c5857
AI
11594}
11595
79fb4764 11596extern __inline __m512
756c5857 11597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
11598_mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
11599 const int __imm, const int __R)
756c5857 11600{
79fb4764
HJ
11601 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
11602 __imm,
11603 (__v16sf)
11604 _mm512_setzero_ps (),
11605 (__mmask16) __A, __R);
756c5857
AI
11606}
11607
79fb4764 11608extern __inline __m512d
756c5857 11609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11610_mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
756c5857 11611{
79fb4764
HJ
11612 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
11613 (__v8df)
11614 _mm512_undefined_pd (),
11615 -1, __R);
756c5857
AI
11616}
11617
79fb4764 11618extern __inline __m512d
6901ea62 11619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
11620_mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
11621 __m512d __C, const int __imm, const int __R)
6901ea62 11622{
79fb4764
HJ
11623 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
11624 (__v8df) __A,
11625 (__mmask8) __B, __R);
6901ea62
AS
11626}
11627
79fb4764 11628extern __inline __m512d
756c5857 11629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
11630_mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
11631 const int __imm, const int __R)
756c5857 11632{
79fb4764
HJ
11633 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
11634 __imm,
11635 (__v8df)
11636 _mm512_setzero_pd (),
11637 (__mmask8) __A, __R);
756c5857
AI
11638}
11639
79fb4764
HJ
11640#else
11641#define _mm512_roundscale_round_ps(A, B, R) \
11642 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
11643 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
11644#define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
11645 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
11646 (int)(D), \
11647 (__v16sf)(__m512)(A), \
11648 (__mmask16)(B), R))
11649#define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
11650 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
11651 (int)(C), \
11652 (__v16sf)_mm512_setzero_ps(),\
11653 (__mmask16)(A), R))
11654#define _mm512_roundscale_round_pd(A, B, R) \
11655 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
11656 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
11657#define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
11658 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
11659 (int)(D), \
11660 (__v8df)(__m512d)(A), \
11661 (__mmask8)(B), R))
11662#define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
11663 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
11664 (int)(C), \
11665 (__v8df)_mm512_setzero_pd(),\
11666 (__mmask8)(A), R))
11667#endif
11668
756c5857
AI
11669extern __inline __m512
11670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11671_mm512_floor_ps (__m512 __A)
756c5857 11672{
79fb4764
HJ
11673 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
11674 _MM_FROUND_FLOOR,
11675 (__v16sf) __A, -1,
11676 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
11677}
11678
79fb4764 11679extern __inline __m512d
756c5857 11680__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11681_mm512_floor_pd (__m512d __A)
756c5857 11682{
79fb4764
HJ
11683 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
11684 _MM_FROUND_FLOOR,
11685 (__v8df) __A, -1,
11686 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
11687}
11688
11689extern __inline __m512
11690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11691_mm512_ceil_ps (__m512 __A)
756c5857 11692{
79fb4764
HJ
11693 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
11694 _MM_FROUND_CEIL,
11695 (__v16sf) __A, -1,
11696 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
11697}
11698
79fb4764 11699extern __inline __m512d
756c5857 11700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11701_mm512_ceil_pd (__m512d __A)
756c5857 11702{
79fb4764
HJ
11703 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
11704 _MM_FROUND_CEIL,
11705 (__v8df) __A, -1,
11706 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
11707}
11708
79fb4764 11709extern __inline __m512
756c5857 11710__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11711_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
756c5857 11712{
79fb4764
HJ
11713 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
11714 _MM_FROUND_FLOOR,
11715 (__v16sf) __W, __U,
11716 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
11717}
11718
79fb4764 11719extern __inline __m512d
756c5857 11720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11721_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
756c5857 11722{
79fb4764
HJ
11723 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
11724 _MM_FROUND_FLOOR,
11725 (__v8df) __W, __U,
11726 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
11727}
11728
79fb4764 11729extern __inline __m512
756c5857 11730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11731_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
756c5857 11732{
79fb4764
HJ
11733 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
11734 _MM_FROUND_CEIL,
11735 (__v16sf) __W, __U,
11736 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
11737}
11738
79fb4764 11739extern __inline __m512d
756c5857 11740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11741_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
756c5857 11742{
79fb4764
HJ
11743 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
11744 _MM_FROUND_CEIL,
11745 (__v8df) __W, __U,
11746 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
11747}
11748
79fb4764 11749#ifdef __OPTIMIZE__
756c5857
AI
11750extern __inline __m512i
11751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11752_mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
756c5857 11753{
79fb4764
HJ
11754 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
11755 (__v16si) __B, __imm,
11756 (__v16si)
4271e5cb 11757 _mm512_undefined_epi32 (),
79fb4764 11758 (__mmask16) -1);
756c5857
AI
11759}
11760
11761extern __inline __m512i
11762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
11763_mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
11764 __m512i __B, const int __imm)
756c5857 11765{
79fb4764
HJ
11766 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
11767 (__v16si) __B, __imm,
11768 (__v16si) __W,
11769 (__mmask16) __U);
756c5857
AI
11770}
11771
11772extern __inline __m512i
11773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
11774_mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
11775 const int __imm)
756c5857 11776{
79fb4764
HJ
11777 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
11778 (__v16si) __B, __imm,
11779 (__v16si)
11780 _mm512_setzero_si512 (),
11781 (__mmask16) __U);
756c5857
AI
11782}
11783
11784extern __inline __m512i
11785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11786_mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
756c5857 11787{
79fb4764
HJ
11788 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
11789 (__v8di) __B, __imm,
756c5857 11790 (__v8di)
4271e5cb 11791 _mm512_undefined_epi32 (),
756c5857
AI
11792 (__mmask8) -1);
11793}
11794
11795extern __inline __m512i
11796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
11797_mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
11798 __m512i __B, const int __imm)
756c5857 11799{
79fb4764
HJ
11800 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
11801 (__v8di) __B, __imm,
11802 (__v8di) __W,
11803 (__mmask8) __U);
756c5857
AI
11804}
11805
11806extern __inline __m512i
11807__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
11808_mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
11809 const int __imm)
756c5857 11810{
79fb4764
HJ
11811 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
11812 (__v8di) __B, __imm,
756c5857
AI
11813 (__v8di)
11814 _mm512_setzero_si512 (),
79fb4764 11815 (__mmask8) __U);
756c5857 11816}
79fb4764
HJ
11817#else
11818#define _mm512_alignr_epi32(X, Y, C) \
11819 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
11820 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\
11821 (__mmask16)-1))
756c5857 11822
79fb4764
HJ
11823#define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
11824 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
11825 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
11826 (__mmask16)(U)))
756c5857 11827
79fb4764
HJ
11828#define _mm512_maskz_alignr_epi32(U, X, Y, C) \
11829 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
11830 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
11831 (__mmask16)(U)))
756c5857 11832
79fb4764
HJ
11833#define _mm512_alignr_epi64(X, Y, C) \
11834 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
11835 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \
11836 (__mmask8)-1))
11837
11838#define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
11839 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
11840 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
11841
11842#define _mm512_maskz_alignr_epi64(U, X, Y, C) \
11843 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
11844 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
11845 (__mmask8)(U)))
11846#endif
11847
11848extern __inline __mmask16
756c5857 11849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11850_mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
756c5857 11851{
79fb4764
HJ
11852 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
11853 (__v16si) __B,
11854 (__mmask16) -1);
756c5857
AI
11855}
11856
79fb4764 11857extern __inline __mmask16
756c5857 11858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11859_mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
756c5857 11860{
79fb4764
HJ
11861 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
11862 (__v16si) __B, __U);
756c5857
AI
11863}
11864
79fb4764 11865extern __inline __mmask8
756c5857 11866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11867_mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
756c5857 11868{
79fb4764
HJ
11869 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
11870 (__v8di) __B, __U);
756c5857
AI
11871}
11872
79fb4764 11873extern __inline __mmask8
756c5857 11874__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11875_mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
756c5857 11876{
79fb4764
HJ
11877 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
11878 (__v8di) __B,
11879 (__mmask8) -1);
756c5857
AI
11880}
11881
79fb4764 11882extern __inline __mmask16
756c5857 11883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11884_mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
756c5857 11885{
79fb4764
HJ
11886 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
11887 (__v16si) __B,
11888 (__mmask16) -1);
756c5857
AI
11889}
11890
79fb4764 11891extern __inline __mmask16
756c5857 11892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11893_mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
756c5857 11894{
79fb4764
HJ
11895 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
11896 (__v16si) __B, __U);
756c5857
AI
11897}
11898
79fb4764 11899extern __inline __mmask8
756c5857 11900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11901_mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
756c5857 11902{
79fb4764
HJ
11903 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
11904 (__v8di) __B, __U);
756c5857
AI
11905}
11906
79fb4764 11907extern __inline __mmask8
756c5857 11908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11909_mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
756c5857 11910{
79fb4764
HJ
11911 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
11912 (__v8di) __B,
11913 (__mmask8) -1);
756c5857
AI
11914}
11915
79fb4764 11916extern __inline __mmask16
756c5857 11917__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11918_mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
756c5857 11919{
79fb4764
HJ
11920 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
11921 (__v16si) __Y, 5,
11922 (__mmask16) -1);
756c5857
AI
11923}
11924
79fb4764 11925extern __inline __mmask16
756c5857 11926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11927_mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
756c5857 11928{
79fb4764
HJ
11929 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
11930 (__v16si) __Y, 5,
11931 (__mmask16) __M);
756c5857
AI
11932}
11933
79fb4764 11934extern __inline __mmask16
756c5857 11935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11936_mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
756c5857 11937{
79fb4764
HJ
11938 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
11939 (__v16si) __Y, 5,
11940 (__mmask16) __M);
756c5857
AI
11941}
11942
79fb4764 11943extern __inline __mmask16
756c5857 11944__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11945_mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
756c5857 11946{
79fb4764
HJ
11947 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
11948 (__v16si) __Y, 5,
11949 (__mmask16) -1);
756c5857
AI
11950}
11951
79fb4764 11952extern __inline __mmask8
756c5857 11953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11954_mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
756c5857 11955{
79fb4764
HJ
11956 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
11957 (__v8di) __Y, 5,
11958 (__mmask8) __M);
756c5857
AI
11959}
11960
79fb4764 11961extern __inline __mmask8
075691af 11962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11963_mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
075691af 11964{
79fb4764
HJ
11965 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
11966 (__v8di) __Y, 5,
11967 (__mmask8) -1);
075691af
AI
11968}
11969
79fb4764 11970extern __inline __mmask8
f4ee3a9e 11971__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11972_mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
f4ee3a9e 11973{
79fb4764
HJ
11974 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
11975 (__v8di) __Y, 5,
11976 (__mmask8) __M);
f4ee3a9e
UB
11977}
11978
79fb4764 11979extern __inline __mmask8
f4ee3a9e 11980__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11981_mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
f4ee3a9e 11982{
79fb4764
HJ
11983 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
11984 (__v8di) __Y, 5,
11985 (__mmask8) -1);
f4ee3a9e
UB
11986}
11987
79fb4764 11988extern __inline __mmask16
075691af 11989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11990_mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
075691af 11991{
79fb4764
HJ
11992 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
11993 (__v16si) __Y, 2,
11994 (__mmask16) __M);
075691af
AI
11995}
11996
79fb4764 11997extern __inline __mmask16
f4ee3a9e 11998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 11999_mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
f4ee3a9e 12000{
79fb4764
HJ
12001 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
12002 (__v16si) __Y, 2,
12003 (__mmask16) -1);
f4ee3a9e
UB
12004}
12005
79fb4764 12006extern __inline __mmask16
f4ee3a9e 12007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12008_mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
f4ee3a9e 12009{
79fb4764
HJ
12010 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
12011 (__v16si) __Y, 2,
12012 (__mmask16) __M);
f4ee3a9e
UB
12013}
12014
79fb4764 12015extern __inline __mmask16
075691af 12016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12017_mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
075691af 12018{
79fb4764
HJ
12019 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
12020 (__v16si) __Y, 2,
12021 (__mmask16) -1);
075691af
AI
12022}
12023
79fb4764 12024extern __inline __mmask8
f4ee3a9e 12025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12026_mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
f4ee3a9e 12027{
79fb4764
HJ
12028 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
12029 (__v8di) __Y, 2,
12030 (__mmask8) __M);
f4ee3a9e
UB
12031}
12032
79fb4764 12033extern __inline __mmask8
f4ee3a9e 12034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12035_mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
f4ee3a9e 12036{
79fb4764
HJ
12037 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
12038 (__v8di) __Y, 2,
12039 (__mmask8) -1);
f4ee3a9e
UB
12040}
12041
79fb4764 12042extern __inline __mmask8
075691af 12043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12044_mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
075691af 12045{
79fb4764
HJ
12046 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
12047 (__v8di) __Y, 2,
12048 (__mmask8) __M);
075691af
AI
12049}
12050
79fb4764 12051extern __inline __mmask8
f4ee3a9e 12052__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12053_mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
f4ee3a9e 12054{
79fb4764
HJ
12055 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
12056 (__v8di) __Y, 2,
12057 (__mmask8) -1);
f4ee3a9e
UB
12058}
12059
79fb4764 12060extern __inline __mmask16
f4ee3a9e 12061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12062_mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
f4ee3a9e 12063{
79fb4764
HJ
12064 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
12065 (__v16si) __Y, 1,
12066 (__mmask16) __M);
f4ee3a9e
UB
12067}
12068
79fb4764 12069extern __inline __mmask16
756c5857 12070__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12071_mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
756c5857 12072{
79fb4764
HJ
12073 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
12074 (__v16si) __Y, 1,
12075 (__mmask16) -1);
756c5857
AI
12076}
12077
79fb4764 12078extern __inline __mmask16
756c5857 12079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12080_mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
756c5857 12081{
79fb4764
HJ
12082 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
12083 (__v16si) __Y, 1,
12084 (__mmask16) __M);
756c5857
AI
12085}
12086
79fb4764 12087extern __inline __mmask16
756c5857 12088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12089_mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
756c5857 12090{
79fb4764
HJ
12091 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
12092 (__v16si) __Y, 1,
12093 (__mmask16) -1);
756c5857
AI
12094}
12095
79fb4764 12096extern __inline __mmask8
756c5857 12097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12098_mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
756c5857 12099{
79fb4764
HJ
12100 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
12101 (__v8di) __Y, 1,
12102 (__mmask8) __M);
756c5857
AI
12103}
12104
79fb4764 12105extern __inline __mmask8
075691af 12106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12107_mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
075691af 12108{
79fb4764
HJ
12109 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
12110 (__v8di) __Y, 1,
12111 (__mmask8) -1);
075691af
AI
12112}
12113
79fb4764 12114extern __inline __mmask8
075691af 12115__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12116_mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
075691af 12117{
79fb4764
HJ
12118 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
12119 (__v8di) __Y, 1,
12120 (__mmask8) __M);
075691af
AI
12121}
12122
79fb4764 12123extern __inline __mmask8
075691af 12124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12125_mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
075691af 12126{
79fb4764
HJ
12127 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
12128 (__v8di) __Y, 1,
12129 (__mmask8) -1);
075691af
AI
12130}
12131
79fb4764 12132extern __inline __mmask16
075691af 12133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12134_mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
075691af 12135{
79fb4764
HJ
12136 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
12137 (__v16si) __Y, 4,
12138 (__mmask16) -1);
075691af
AI
12139}
12140
79fb4764 12141extern __inline __mmask16
075691af 12142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12143_mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
075691af 12144{
79fb4764
HJ
12145 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
12146 (__v16si) __Y, 4,
12147 (__mmask16) __M);
075691af
AI
12148}
12149
79fb4764 12150extern __inline __mmask16
075691af 12151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12152_mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
075691af 12153{
79fb4764
HJ
12154 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
12155 (__v16si) __Y, 4,
12156 (__mmask16) __M);
075691af
AI
12157}
12158
79fb4764 12159extern __inline __mmask16
075691af 12160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12161_mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
075691af 12162{
79fb4764
HJ
12163 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
12164 (__v16si) __Y, 4,
12165 (__mmask16) -1);
075691af
AI
12166}
12167
79fb4764 12168extern __inline __mmask8
075691af 12169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12170_mm512_mask_cmpneq_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
075691af 12171{
79fb4764
HJ
12172 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
12173 (__v8di) __Y, 4,
12174 (__mmask8) __M);
075691af 12175}
075691af 12176
79fb4764 12177extern __inline __mmask8
5c4ade6d 12178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12179_mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
5c4ade6d 12180{
79fb4764
HJ
12181 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
12182 (__v8di) __Y, 4,
12183 (__mmask8) -1);
5c4ade6d
JJ
12184}
12185
79fb4764 12186extern __inline __mmask8
5c4ade6d 12187__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12188_mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
5c4ade6d 12189{
79fb4764
HJ
12190 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
12191 (__v8di) __Y, 4,
12192 (__mmask8) __M);
5c4ade6d
JJ
12193}
12194
79fb4764 12195extern __inline __mmask8
5c4ade6d 12196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12197_mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
5c4ade6d 12198{
79fb4764
HJ
12199 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
12200 (__v8di) __Y, 4,
12201 (__mmask8) -1);
5c4ade6d
JJ
12202}
12203
79fb4764
HJ
12204#define _MM_CMPINT_EQ 0x0
12205#define _MM_CMPINT_LT 0x1
12206#define _MM_CMPINT_LE 0x2
12207#define _MM_CMPINT_UNUSED 0x3
12208#define _MM_CMPINT_NE 0x4
12209#define _MM_CMPINT_NLT 0x5
12210#define _MM_CMPINT_GE 0x5
12211#define _MM_CMPINT_NLE 0x6
12212#define _MM_CMPINT_GT 0x6
12213
12214#ifdef __OPTIMIZE__
12215extern __inline __mmask8
5c4ade6d 12216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12217_mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
5c4ade6d 12218{
79fb4764
HJ
12219 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
12220 (__v8di) __Y, __P,
12221 (__mmask8) -1);
5c4ade6d
JJ
12222}
12223
79fb4764 12224extern __inline __mmask16
5c4ade6d 12225__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12226_mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
5c4ade6d 12227{
79fb4764
HJ
12228 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
12229 (__v16si) __Y, __P,
12230 (__mmask16) -1);
5c4ade6d
JJ
12231}
12232
79fb4764 12233extern __inline __mmask8
5c4ade6d 12234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12235_mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
5c4ade6d 12236{
79fb4764
HJ
12237 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
12238 (__v8di) __Y, __P,
12239 (__mmask8) -1);
5c4ade6d
JJ
12240}
12241
79fb4764 12242extern __inline __mmask16
5c4ade6d 12243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12244_mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
5c4ade6d 12245{
79fb4764
HJ
12246 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
12247 (__v16si) __Y, __P,
12248 (__mmask16) -1);
5c4ade6d
JJ
12249}
12250
79fb4764 12251extern __inline __mmask8
5c4ade6d 12252__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12253_mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
12254 const int __R)
5c4ade6d 12255{
79fb4764
HJ
12256 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12257 (__v8df) __Y, __P,
12258 (__mmask8) -1, __R);
5c4ade6d
JJ
12259}
12260
79fb4764 12261extern __inline __mmask16
5c4ade6d 12262__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12263_mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
5c4ade6d 12264{
79fb4764
HJ
12265 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12266 (__v16sf) __Y, __P,
12267 (__mmask16) -1, __R);
5c4ade6d
JJ
12268}
12269
79fb4764 12270extern __inline __mmask8
5c4ade6d 12271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12272_mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
12273 const int __P)
5c4ade6d 12274{
79fb4764
HJ
12275 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
12276 (__v8di) __Y, __P,
12277 (__mmask8) __U);
5c4ade6d
JJ
12278}
12279
79fb4764 12280extern __inline __mmask16
5c4ade6d 12281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12282_mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
12283 const int __P)
12284{
12285 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
12286 (__v16si) __Y, __P,
12287 (__mmask16) __U);
5c4ade6d
JJ
12288}
12289
79fb4764 12290extern __inline __mmask8
5c4ade6d 12291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12292_mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
12293 const int __P)
5c4ade6d 12294{
79fb4764
HJ
12295 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
12296 (__v8di) __Y, __P,
12297 (__mmask8) __U);
5c4ade6d
JJ
12298}
12299
79fb4764 12300extern __inline __mmask16
5c4ade6d 12301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12302_mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
12303 const int __P)
5c4ade6d 12304{
79fb4764
HJ
12305 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
12306 (__v16si) __Y, __P,
12307 (__mmask16) __U);
5c4ade6d
JJ
12308}
12309
79fb4764 12310extern __inline __mmask8
5c4ade6d 12311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12312_mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
12313 const int __P, const int __R)
5c4ade6d 12314{
79fb4764
HJ
12315 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12316 (__v8df) __Y, __P,
12317 (__mmask8) __U, __R);
5c4ade6d
JJ
12318}
12319
79fb4764 12320extern __inline __mmask16
5c4ade6d 12321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12322_mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
12323 const int __P, const int __R)
5c4ade6d 12324{
79fb4764
HJ
12325 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12326 (__v16sf) __Y, __P,
12327 (__mmask16) __U, __R);
5c4ade6d
JJ
12328}
12329
79fb4764
HJ
12330#else
12331#define _mm512_cmp_epi64_mask(X, Y, P) \
12332 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
12333 (__v8di)(__m512i)(Y), (int)(P),\
12334 (__mmask8)-1))
12335
12336#define _mm512_cmp_epi32_mask(X, Y, P) \
12337 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
12338 (__v16si)(__m512i)(Y), (int)(P), \
12339 (__mmask16)-1))
12340
12341#define _mm512_cmp_epu64_mask(X, Y, P) \
12342 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
12343 (__v8di)(__m512i)(Y), (int)(P),\
12344 (__mmask8)-1))
12345
12346#define _mm512_cmp_epu32_mask(X, Y, P) \
12347 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
12348 (__v16si)(__m512i)(Y), (int)(P), \
12349 (__mmask16)-1))
12350
12351#define _mm512_cmp_round_pd_mask(X, Y, P, R) \
12352 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12353 (__v8df)(__m512d)(Y), (int)(P),\
12354 (__mmask8)-1, R))
12355
12356#define _mm512_cmp_round_ps_mask(X, Y, P, R) \
12357 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12358 (__v16sf)(__m512)(Y), (int)(P),\
12359 (__mmask16)-1, R))
12360
12361#define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
12362 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
12363 (__v8di)(__m512i)(Y), (int)(P),\
12364 (__mmask8)(M)))
12365
12366#define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
12367 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
12368 (__v16si)(__m512i)(Y), (int)(P), \
12369 (__mmask16)(M)))
12370
12371#define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
12372 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
12373 (__v8di)(__m512i)(Y), (int)(P),\
12374 (__mmask8)(M)))
12375
12376#define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
12377 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
12378 (__v16si)(__m512i)(Y), (int)(P), \
12379 (__mmask16)(M)))
12380
12381#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
12382 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12383 (__v8df)(__m512d)(Y), (int)(P),\
12384 (__mmask8)(M), R))
12385
12386#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
12387 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12388 (__v16sf)(__m512)(Y), (int)(P),\
12389 (__mmask16)(M), R))
12390
12391#endif
12392
12393#ifdef __OPTIMIZE__
12394extern __inline __m512
5c4ade6d 12395__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12396_mm512_i32gather_ps (__m512i __index, void const *__addr, int __scale)
5c4ade6d 12397{
79fb4764
HJ
12398 __m512 __v1_old = _mm512_undefined_ps ();
12399 __mmask16 __mask = 0xFFFF;
12400
12401 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
12402 __addr,
12403 (__v16si) __index,
12404 __mask, __scale);
5c4ade6d
JJ
12405}
12406
79fb4764 12407extern __inline __m512
5c4ade6d 12408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12409_mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask,
12410 __m512i __index, void const *__addr, int __scale)
5c4ade6d 12411{
79fb4764
HJ
12412 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
12413 __addr,
12414 (__v16si) __index,
12415 __mask, __scale);
5c4ade6d
JJ
12416}
12417
79fb4764 12418extern __inline __m512d
5c4ade6d 12419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12420_mm512_i32gather_pd (__m256i __index, void const *__addr, int __scale)
5c4ade6d 12421{
79fb4764
HJ
12422 __m512d __v1_old = _mm512_undefined_pd ();
12423 __mmask8 __mask = 0xFF;
12424
12425 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
12426 __addr,
12427 (__v8si) __index, __mask,
12428 __scale);
5c4ade6d
JJ
12429}
12430
79fb4764 12431extern __inline __m512d
5c4ade6d 12432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12433_mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
12434 __m256i __index, void const *__addr, int __scale)
5c4ade6d 12435{
79fb4764
HJ
12436 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
12437 __addr,
12438 (__v8si) __index,
12439 __mask, __scale);
5c4ade6d
JJ
12440}
12441
79fb4764 12442extern __inline __m256
5c4ade6d 12443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12444_mm512_i64gather_ps (__m512i __index, void const *__addr, int __scale)
5c4ade6d 12445{
79fb4764
HJ
12446 __m256 __v1_old = _mm256_undefined_ps ();
12447 __mmask8 __mask = 0xFF;
12448
12449 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
12450 __addr,
12451 (__v8di) __index, __mask,
12452 __scale);
5c4ade6d
JJ
12453}
12454
79fb4764 12455extern __inline __m256
5c4ade6d 12456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12457_mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
12458 __m512i __index, void const *__addr, int __scale)
5c4ade6d 12459{
79fb4764
HJ
12460 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
12461 __addr,
12462 (__v8di) __index,
12463 __mask, __scale);
5c4ade6d
JJ
12464}
12465
79fb4764 12466extern __inline __m512d
5c4ade6d 12467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12468_mm512_i64gather_pd (__m512i __index, void const *__addr, int __scale)
5c4ade6d 12469{
79fb4764
HJ
12470 __m512d __v1_old = _mm512_undefined_pd ();
12471 __mmask8 __mask = 0xFF;
12472
12473 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
12474 __addr,
12475 (__v8di) __index, __mask,
12476 __scale);
5c4ade6d
JJ
12477}
12478
79fb4764 12479extern __inline __m512d
5c4ade6d 12480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12481_mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
12482 __m512i __index, void const *__addr, int __scale)
5c4ade6d 12483{
79fb4764
HJ
12484 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
12485 __addr,
12486 (__v8di) __index,
12487 __mask, __scale);
5c4ade6d
JJ
12488}
12489
79fb4764 12490extern __inline __m512i
5c4ade6d 12491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12492_mm512_i32gather_epi32 (__m512i __index, void const *__addr, int __scale)
5c4ade6d 12493{
79fb4764
HJ
12494 __m512i __v1_old = _mm512_undefined_epi32 ();
12495 __mmask16 __mask = 0xFFFF;
12496
12497 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
12498 __addr,
12499 (__v16si) __index,
12500 __mask, __scale);
5c4ade6d
JJ
12501}
12502
79fb4764 12503extern __inline __m512i
5c4ade6d 12504__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12505_mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
12506 __m512i __index, void const *__addr, int __scale)
5c4ade6d 12507{
79fb4764
HJ
12508 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
12509 __addr,
12510 (__v16si) __index,
12511 __mask, __scale);
5c4ade6d
JJ
12512}
12513
79fb4764 12514extern __inline __m512i
5c4ade6d 12515__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12516_mm512_i32gather_epi64 (__m256i __index, void const *__addr, int __scale)
5c4ade6d 12517{
79fb4764
HJ
12518 __m512i __v1_old = _mm512_undefined_epi32 ();
12519 __mmask8 __mask = 0xFF;
5c4ade6d 12520
79fb4764
HJ
12521 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
12522 __addr,
12523 (__v8si) __index, __mask,
12524 __scale);
5c4ade6d
JJ
12525}
12526
79fb4764 12527extern __inline __m512i
5c4ade6d 12528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12529_mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
12530 __m256i __index, void const *__addr,
12531 int __scale)
5c4ade6d 12532{
79fb4764
HJ
12533 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
12534 __addr,
12535 (__v8si) __index,
12536 __mask, __scale);
5c4ade6d
JJ
12537}
12538
79fb4764 12539extern __inline __m256i
5c4ade6d 12540__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12541_mm512_i64gather_epi32 (__m512i __index, void const *__addr, int __scale)
5c4ade6d 12542{
79fb4764
HJ
12543 __m256i __v1_old = _mm256_undefined_si256 ();
12544 __mmask8 __mask = 0xFF;
12545
12546 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
12547 __addr,
12548 (__v8di) __index,
12549 __mask, __scale);
5c4ade6d
JJ
12550}
12551
79fb4764 12552extern __inline __m256i
5c4ade6d 12553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12554_mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
12555 __m512i __index, void const *__addr, int __scale)
5c4ade6d 12556{
79fb4764
HJ
12557 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
12558 __addr,
12559 (__v8di) __index,
12560 __mask, __scale);
5c4ade6d
JJ
12561}
12562
79fb4764 12563extern __inline __m512i
5c4ade6d 12564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12565_mm512_i64gather_epi64 (__m512i __index, void const *__addr, int __scale)
5c4ade6d 12566{
79fb4764
HJ
12567 __m512i __v1_old = _mm512_undefined_epi32 ();
12568 __mmask8 __mask = 0xFF;
12569
12570 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
12571 __addr,
12572 (__v8di) __index, __mask,
12573 __scale);
5c4ade6d
JJ
12574}
12575
79fb4764 12576extern __inline __m512i
5c4ade6d 12577__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12578_mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
12579 __m512i __index, void const *__addr,
12580 int __scale)
5c4ade6d 12581{
79fb4764
HJ
12582 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
12583 __addr,
12584 (__v8di) __index,
12585 __mask, __scale);
5c4ade6d
JJ
12586}
12587
79fb4764 12588extern __inline void
5c4ade6d 12589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12590_mm512_i32scatter_ps (void *__addr, __m512i __index, __m512 __v1, int __scale)
5c4ade6d 12591{
79fb4764
HJ
12592 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
12593 (__v16si) __index, (__v16sf) __v1, __scale);
5c4ade6d
JJ
12594}
12595
79fb4764 12596extern __inline void
5c4ade6d 12597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12598_mm512_mask_i32scatter_ps (void *__addr, __mmask16 __mask,
12599 __m512i __index, __m512 __v1, int __scale)
5c4ade6d 12600{
79fb4764
HJ
12601 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
12602 (__v16sf) __v1, __scale);
5c4ade6d
JJ
12603}
12604
79fb4764 12605extern __inline void
5c4ade6d 12606__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12607_mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
12608 int __scale)
5c4ade6d 12609{
79fb4764
HJ
12610 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
12611 (__v8si) __index, (__v8df) __v1, __scale);
5c4ade6d
JJ
12612}
12613
79fb4764 12614extern __inline void
5c4ade6d 12615__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12616_mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
12617 __m256i __index, __m512d __v1, int __scale)
5c4ade6d 12618{
79fb4764
HJ
12619 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
12620 (__v8df) __v1, __scale);
5c4ade6d
JJ
12621}
12622
79fb4764 12623extern __inline void
5c4ade6d 12624__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12625_mm512_i64scatter_ps (void *__addr, __m512i __index, __m256 __v1, int __scale)
5c4ade6d 12626{
79fb4764
HJ
12627 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
12628 (__v8di) __index, (__v8sf) __v1, __scale);
5c4ade6d
JJ
12629}
12630
79fb4764 12631extern __inline void
5c4ade6d 12632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12633_mm512_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
12634 __m512i __index, __m256 __v1, int __scale)
5c4ade6d 12635{
79fb4764
HJ
12636 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
12637 (__v8sf) __v1, __scale);
5c4ade6d
JJ
12638}
12639
79fb4764 12640extern __inline void
5c4ade6d 12641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12642_mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
12643 int __scale)
5c4ade6d 12644{
79fb4764
HJ
12645 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
12646 (__v8di) __index, (__v8df) __v1, __scale);
5c4ade6d
JJ
12647}
12648
79fb4764 12649extern __inline void
5c4ade6d 12650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12651_mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
12652 __m512i __index, __m512d __v1, int __scale)
5c4ade6d 12653{
79fb4764
HJ
12654 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
12655 (__v8df) __v1, __scale);
5c4ade6d
JJ
12656}
12657
79fb4764 12658extern __inline void
5c4ade6d 12659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12660_mm512_i32scatter_epi32 (void *__addr, __m512i __index,
12661 __m512i __v1, int __scale)
5c4ade6d 12662{
79fb4764
HJ
12663 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
12664 (__v16si) __index, (__v16si) __v1, __scale);
5c4ade6d
JJ
12665}
12666
79fb4764 12667extern __inline void
5c4ade6d 12668__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12669_mm512_mask_i32scatter_epi32 (void *__addr, __mmask16 __mask,
12670 __m512i __index, __m512i __v1, int __scale)
5c4ade6d 12671{
79fb4764
HJ
12672 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
12673 (__v16si) __v1, __scale);
5c4ade6d
JJ
12674}
12675
79fb4764 12676extern __inline void
5c4ade6d 12677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12678_mm512_i32scatter_epi64 (void *__addr, __m256i __index,
12679 __m512i __v1, int __scale)
5c4ade6d 12680{
79fb4764
HJ
12681 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
12682 (__v8si) __index, (__v8di) __v1, __scale);
5c4ade6d
JJ
12683}
12684
79fb4764 12685extern __inline void
5c4ade6d 12686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12687_mm512_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
12688 __m256i __index, __m512i __v1, int __scale)
12689{
12690 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
12691 (__v8di) __v1, __scale);
5c4ade6d
JJ
12692}
12693
79fb4764 12694extern __inline void
5c4ade6d 12695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12696_mm512_i64scatter_epi32 (void *__addr, __m512i __index,
12697 __m256i __v1, int __scale)
5c4ade6d 12698{
79fb4764
HJ
12699 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
12700 (__v8di) __index, (__v8si) __v1, __scale);
5c4ade6d
JJ
12701}
12702
79fb4764 12703extern __inline void
5c4ade6d 12704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12705_mm512_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
12706 __m512i __index, __m256i __v1, int __scale)
5c4ade6d 12707{
79fb4764
HJ
12708 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
12709 (__v8si) __v1, __scale);
5c4ade6d
JJ
12710}
12711
79fb4764 12712extern __inline void
5c4ade6d 12713__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12714_mm512_i64scatter_epi64 (void *__addr, __m512i __index,
12715 __m512i __v1, int __scale)
5c4ade6d 12716{
79fb4764
HJ
12717 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
12718 (__v8di) __index, (__v8di) __v1, __scale);
5c4ade6d
JJ
12719}
12720
79fb4764 12721extern __inline void
5c4ade6d 12722__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
12723_mm512_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
12724 __m512i __index, __m512i __v1, int __scale)
5c4ade6d 12725{
79fb4764
HJ
12726 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
12727 (__v8di) __v1, __scale);
5c4ade6d
JJ
12728}
12729#else
79fb4764
HJ
12730#define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
12731 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
12732 (void const *) (ADDR), \
12733 (__v16si)(__m512i) (INDEX), \
12734 (__mmask16)0xFFFF, \
12735 (int) (SCALE))
5c4ade6d 12736
79fb4764
HJ
12737#define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12738 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512) (V1OLD), \
12739 (void const *) (ADDR), \
12740 (__v16si)(__m512i) (INDEX), \
12741 (__mmask16) (MASK), \
12742 (int) (SCALE))
5c4ade6d 12743
79fb4764
HJ
12744#define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
12745 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
12746 (void const *) (ADDR), \
12747 (__v8si)(__m256i) (INDEX), \
12748 (__mmask8)0xFF, (int) (SCALE))
5c4ade6d 12749
79fb4764
HJ
12750#define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12751 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d) (V1OLD), \
12752 (void const *) (ADDR), \
12753 (__v8si)(__m256i) (INDEX), \
12754 (__mmask8) (MASK), \
12755 (int) (SCALE))
5c4ade6d 12756
79fb4764
HJ
12757#define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
12758 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
12759 (void const *) (ADDR), \
12760 (__v8di)(__m512i) (INDEX), \
12761 (__mmask8)0xFF, (int) (SCALE))
5c4ade6d 12762
79fb4764
HJ
12763#define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12764 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256) (V1OLD), \
12765 (void const *) (ADDR), \
12766 (__v8di)(__m512i) (INDEX), \
12767 (__mmask8) (MASK), \
12768 (int) (SCALE))
5c4ade6d 12769
79fb4764
HJ
12770#define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
12771 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
12772 (void const *) (ADDR), \
12773 (__v8di)(__m512i) (INDEX), \
12774 (__mmask8)0xFF, (int) (SCALE))
5c4ade6d 12775
79fb4764
HJ
12776#define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12777 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d) (V1OLD), \
12778 (void const *) (ADDR), \
12779 (__v8di)(__m512i) (INDEX), \
12780 (__mmask8) (MASK), \
12781 (int) (SCALE))
5c4ade6d 12782
79fb4764
HJ
12783#define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
12784 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (),\
12785 (void const *) (ADDR), \
12786 (__v16si)(__m512i) (INDEX), \
12787 (__mmask16)0xFFFF, \
12788 (int) (SCALE))
5c4ade6d 12789
79fb4764
HJ
12790#define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12791 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i) (V1OLD), \
12792 (void const *) (ADDR), \
12793 (__v16si)(__m512i) (INDEX), \
12794 (__mmask16) (MASK), \
12795 (int) (SCALE))
5c4ade6d 12796
79fb4764
HJ
12797#define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
12798 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (),\
12799 (void const *) (ADDR), \
12800 (__v8si)(__m256i) (INDEX), \
12801 (__mmask8)0xFF, (int) (SCALE))
5c4ade6d 12802
79fb4764
HJ
12803#define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12804 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i) (V1OLD), \
12805 (void const *) (ADDR), \
12806 (__v8si)(__m256i) (INDEX), \
12807 (__mmask8) (MASK), \
12808 (int) (SCALE))
5c4ade6d 12809
79fb4764
HJ
12810#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
12811 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(),\
12812 (void const *) (ADDR), \
12813 (__v8di)(__m512i) (INDEX), \
12814 (__mmask8)0xFF, (int) (SCALE))
5c4ade6d 12815
79fb4764
HJ
12816#define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12817 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i) (V1OLD), \
12818 (void const *) (ADDR), \
12819 (__v8di)(__m512i) (INDEX), \
12820 (__mmask8) (MASK), \
12821 (int) (SCALE))
5c4ade6d 12822
79fb4764
HJ
12823#define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
12824 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (),\
12825 (void const *) (ADDR), \
12826 (__v8di)(__m512i) (INDEX), \
12827 (__mmask8)0xFF, (int) (SCALE))
5c4ade6d 12828
79fb4764
HJ
12829#define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12830 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i) (V1OLD), \
12831 (void const *) (ADDR), \
12832 (__v8di)(__m512i) (INDEX), \
12833 (__mmask8) (MASK), \
12834 (int) (SCALE))
5c4ade6d 12835
79fb4764
HJ
12836#define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
12837 __builtin_ia32_scattersiv16sf ((void *) (ADDR), (__mmask16)0xFFFF, \
12838 (__v16si)(__m512i) (INDEX), \
12839 (__v16sf)(__m512) (V1), (int) (SCALE))
5c4ade6d 12840
79fb4764
HJ
12841#define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
12842 __builtin_ia32_scattersiv16sf ((void *) (ADDR), (__mmask16) (MASK), \
12843 (__v16si)(__m512i) (INDEX), \
12844 (__v16sf)(__m512) (V1), (int) (SCALE))
5c4ade6d 12845
79fb4764
HJ
12846#define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
12847 __builtin_ia32_scattersiv8df ((void *) (ADDR), (__mmask8)0xFF, \
12848 (__v8si)(__m256i) (INDEX), \
12849 (__v8df)(__m512d) (V1), (int) (SCALE))
5c4ade6d 12850
79fb4764
HJ
12851#define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
12852 __builtin_ia32_scattersiv8df ((void *) (ADDR), (__mmask8) (MASK), \
12853 (__v8si)(__m256i) (INDEX), \
12854 (__v8df)(__m512d) (V1), (int) (SCALE))
12855
12856#define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
12857 __builtin_ia32_scatterdiv16sf ((void *) (ADDR), (__mmask8)0xFF, \
12858 (__v8di)(__m512i) (INDEX), \
12859 (__v8sf)(__m256) (V1), (int) (SCALE))
12860
12861#define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
12862 __builtin_ia32_scatterdiv16sf ((void *) (ADDR), (__mmask16) (MASK), \
12863 (__v8di)(__m512i) (INDEX), \
12864 (__v8sf)(__m256) (V1), (int) (SCALE))
12865
12866#define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
12867 __builtin_ia32_scatterdiv8df ((void *) (ADDR), (__mmask8)0xFF, \
12868 (__v8di)(__m512i) (INDEX), \
12869 (__v8df)(__m512d) (V1), (int) (SCALE))
12870
12871#define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
12872 __builtin_ia32_scatterdiv8df ((void *) (ADDR), (__mmask8) (MASK), \
12873 (__v8di)(__m512i) (INDEX), \
12874 (__v8df)(__m512d) (V1), (int) (SCALE))
12875
12876#define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
12877 __builtin_ia32_scattersiv16si ((void *) (ADDR), (__mmask16)0xFFFF, \
12878 (__v16si)(__m512i) (INDEX), \
12879 (__v16si)(__m512i) (V1), (int) (SCALE))
12880
12881#define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
12882 __builtin_ia32_scattersiv16si ((void *) (ADDR), (__mmask16) (MASK), \
12883 (__v16si)(__m512i) (INDEX), \
12884 (__v16si)(__m512i) (V1), (int) (SCALE))
12885
12886#define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
12887 __builtin_ia32_scattersiv8di ((void *) (ADDR), (__mmask8)0xFF, \
12888 (__v8si)(__m256i) (INDEX), \
12889 (__v8di)(__m512i) (V1), (int) (SCALE))
12890
12891#define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
12892 __builtin_ia32_scattersiv8di ((void *) (ADDR), (__mmask8) (MASK), \
12893 (__v8si)(__m256i) (INDEX), \
12894 (__v8di)(__m512i) (V1), (int) (SCALE))
12895
12896#define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
12897 __builtin_ia32_scatterdiv16si ((void *) (ADDR), (__mmask8)0xFF, \
12898 (__v8di)(__m512i) (INDEX), \
12899 (__v8si)(__m256i) (V1), (int) (SCALE))
12900
12901#define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
12902 __builtin_ia32_scatterdiv16si ((void *) (ADDR), (__mmask8) (MASK), \
12903 (__v8di)(__m512i) (INDEX), \
12904 (__v8si)(__m256i) (V1), (int) (SCALE))
12905
12906#define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
12907 __builtin_ia32_scatterdiv8di ((void *) (ADDR), (__mmask8)0xFF, \
12908 (__v8di)(__m512i) (INDEX), \
12909 (__v8di)(__m512i) (V1), (int) (SCALE))
12910
12911#define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
12912 __builtin_ia32_scatterdiv8di ((void *) (ADDR), (__mmask8) (MASK), \
12913 (__v8di)(__m512i) (INDEX), \
12914 (__v8di)(__m512i) (V1), (int) (SCALE))
12915#endif
5c4ade6d 12916
79fb4764
HJ
12917extern __inline __m512d
12918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12919_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
12920{
12921 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
12922 (__v8df) __W,
12923 (__mmask8) __U);
12924}
5c4ade6d 12925
79fb4764
HJ
12926extern __inline __m512d
12927__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12928_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
12929{
12930 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
12931 (__v8df)
12932 _mm512_setzero_pd (),
12933 (__mmask8) __U);
12934}
5c4ade6d 12935
79fb4764
HJ
12936extern __inline void
12937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12938_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
12939{
12940 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
12941 (__mmask8) __U);
12942}
5c4ade6d 12943
79fb4764
HJ
12944extern __inline __m512
12945__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12946_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
12947{
12948 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
12949 (__v16sf) __W,
12950 (__mmask16) __U);
12951}
5c4ade6d 12952
79fb4764 12953extern __inline __m512
756c5857 12954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12955_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
756c5857 12956{
79fb4764
HJ
12957 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
12958 (__v16sf)
12959 _mm512_setzero_ps (),
12960 (__mmask16) __U);
756c5857
AI
12961}
12962
79fb4764 12963extern __inline void
756c5857 12964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12965_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
756c5857 12966{
79fb4764
HJ
12967 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
12968 (__mmask16) __U);
756c5857 12969}
756c5857 12970
79fb4764 12971extern __inline __m512i
756c5857 12972__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12973_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
756c5857 12974{
79fb4764
HJ
12975 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
12976 (__v8di) __W,
12977 (__mmask8) __U);
756c5857
AI
12978}
12979
79fb4764 12980extern __inline __m512i
756c5857 12981__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12982_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
756c5857 12983{
79fb4764
HJ
12984 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
12985 (__v8di)
12986 _mm512_setzero_si512 (),
12987 (__mmask8) __U);
756c5857
AI
12988}
12989
79fb4764 12990extern __inline void
756c5857 12991__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 12992_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
756c5857 12993{
79fb4764
HJ
12994 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
12995 (__mmask8) __U);
756c5857
AI
12996}
12997
79fb4764 12998extern __inline __m512i
756c5857 12999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13000_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
756c5857 13001{
79fb4764
HJ
13002 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
13003 (__v16si) __W,
13004 (__mmask16) __U);
756c5857
AI
13005}
13006
79fb4764 13007extern __inline __m512i
756c5857 13008__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13009_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
756c5857 13010{
79fb4764
HJ
13011 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
13012 (__v16si)
13013 _mm512_setzero_si512 (),
13014 (__mmask16) __U);
756c5857
AI
13015}
13016
79fb4764 13017extern __inline void
756c5857 13018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13019_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
756c5857 13020{
79fb4764
HJ
13021 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
13022 (__mmask16) __U);
756c5857
AI
13023}
13024
13025extern __inline __m512d
13026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13027_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
756c5857 13028{
79fb4764
HJ
13029 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
13030 (__v8df) __W,
13031 (__mmask8) __U);
756c5857
AI
13032}
13033
13034extern __inline __m512d
13035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13036_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
756c5857 13037{
79fb4764
HJ
13038 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
13039 (__v8df)
13040 _mm512_setzero_pd (),
13041 (__mmask8) __U);
756c5857
AI
13042}
13043
13044extern __inline __m512d
13045__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13046_mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
756c5857 13047{
79fb4764
HJ
13048 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
13049 (__v8df) __W,
13050 (__mmask8) __U);
756c5857
AI
13051}
13052
79fb4764 13053extern __inline __m512d
756c5857 13054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13055_mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
756c5857 13056{
79fb4764
HJ
13057 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
13058 (__v8df)
13059 _mm512_setzero_pd (),
13060 (__mmask8) __U);
756c5857
AI
13061}
13062
13063extern __inline __m512
13064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13065_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
756c5857 13066{
79fb4764
HJ
13067 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
13068 (__v16sf) __W,
13069 (__mmask16) __U);
756c5857
AI
13070}
13071
13072extern __inline __m512
13073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13074_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
756c5857 13075{
79fb4764
HJ
13076 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
13077 (__v16sf)
13078 _mm512_setzero_ps (),
13079 (__mmask16) __U);
756c5857
AI
13080}
13081
79fb4764 13082extern __inline __m512
1853f5c7 13083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13084_mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
1853f5c7 13085{
79fb4764
HJ
13086 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
13087 (__v16sf) __W,
13088 (__mmask16) __U);
1853f5c7
SP
13089}
13090
79fb4764 13091extern __inline __m512
1853f5c7 13092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13093_mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
1853f5c7 13094{
79fb4764
HJ
13095 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
13096 (__v16sf)
13097 _mm512_setzero_ps (),
13098 (__mmask16) __U);
1853f5c7
SP
13099}
13100
79fb4764 13101extern __inline __m512i
1853f5c7 13102__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13103_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1853f5c7 13104{
79fb4764
HJ
13105 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
13106 (__v8di) __W,
13107 (__mmask8) __U);
1853f5c7
SP
13108}
13109
79fb4764 13110extern __inline __m512i
1853f5c7 13111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13112_mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
1853f5c7 13113{
79fb4764
HJ
13114 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
13115 (__v8di)
13116 _mm512_setzero_si512 (),
13117 (__mmask8) __U);
1853f5c7
SP
13118}
13119
79fb4764 13120extern __inline __m512i
756c5857 13121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13122_mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
756c5857 13123{
79fb4764
HJ
13124 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
13125 (__v8di) __W,
13126 (__mmask8) __U);
756c5857
AI
13127}
13128
79fb4764 13129extern __inline __m512i
756c5857 13130__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13131_mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
756c5857 13132{
79fb4764
HJ
13133 return (__m512i)
13134 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
13135 (__v8di)
13136 _mm512_setzero_si512 (),
13137 (__mmask8) __U);
756c5857
AI
13138}
13139
79fb4764 13140extern __inline __m512i
756c5857 13141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13142_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
756c5857 13143{
79fb4764
HJ
13144 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
13145 (__v16si) __W,
13146 (__mmask16) __U);
756c5857
AI
13147}
13148
79fb4764 13149extern __inline __m512i
756c5857 13150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13151_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
756c5857 13152{
79fb4764
HJ
13153 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
13154 (__v16si)
13155 _mm512_setzero_si512 (),
13156 (__mmask16) __U);
756c5857
AI
13157}
13158
79fb4764 13159extern __inline __m512i
756c5857 13160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13161_mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
756c5857 13162{
79fb4764
HJ
13163 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
13164 (__v16si) __W,
13165 (__mmask16) __U);
756c5857
AI
13166}
13167
79fb4764 13168extern __inline __m512i
756c5857 13169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13170_mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
756c5857 13171{
79fb4764
HJ
13172 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
13173 (__v16si)
13174 _mm512_setzero_si512
13175 (), (__mmask16) __U);
756c5857
AI
13176}
13177
79fb4764 13178extern __inline __mmask16
1853f5c7 13179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13180_mm512_kand (__mmask16 __A, __mmask16 __B)
1853f5c7 13181{
79fb4764 13182 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
1853f5c7
SP
13183}
13184
79fb4764 13185extern __inline __mmask16
1853f5c7 13186__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13187_mm512_kandn (__mmask16 __A, __mmask16 __B)
1853f5c7 13188{
79fb4764
HJ
13189 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
13190 (__mmask16) __B);
1853f5c7
SP
13191}
13192
79fb4764 13193extern __inline __mmask16
1853f5c7 13194__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13195_mm512_kor (__mmask16 __A, __mmask16 __B)
1853f5c7 13196{
79fb4764 13197 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
1853f5c7
SP
13198}
13199
79fb4764 13200extern __inline int
1853f5c7 13201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13202_mm512_kortestz (__mmask16 __A, __mmask16 __B)
1853f5c7 13203{
79fb4764
HJ
13204 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
13205 (__mmask16) __B);
1853f5c7
SP
13206}
13207
79fb4764 13208extern __inline int
756c5857 13209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13210_mm512_kortestc (__mmask16 __A, __mmask16 __B)
756c5857 13211{
79fb4764
HJ
13212 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
13213 (__mmask16) __B);
756c5857
AI
13214}
13215
79fb4764 13216extern __inline __mmask16
756c5857 13217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13218_mm512_kxnor (__mmask16 __A, __mmask16 __B)
756c5857 13219{
79fb4764 13220 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
756c5857
AI
13221}
13222
79fb4764 13223extern __inline __mmask16
756c5857 13224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13225_mm512_kxor (__mmask16 __A, __mmask16 __B)
756c5857 13226{
79fb4764 13227 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
756c5857
AI
13228}
13229
79fb4764 13230extern __inline __mmask16
756c5857 13231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13232_mm512_knot (__mmask16 __A)
756c5857 13233{
79fb4764 13234 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
756c5857
AI
13235}
13236
79fb4764 13237extern __inline __mmask16
756c5857 13238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13239_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
756c5857 13240{
79fb4764 13241 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
756c5857
AI
13242}
13243
79fb4764
HJ
13244#ifdef __OPTIMIZE__
13245extern __inline __m512i
756c5857 13246__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
13247_mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
13248 const int __imm)
756c5857 13249{
79fb4764
HJ
13250 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
13251 (__v4si) __D,
13252 __imm,
13253 (__v16si)
13254 _mm512_setzero_si512 (),
13255 __B);
756c5857
AI
13256}
13257
79fb4764 13258extern __inline __m512
f4ee3a9e 13259__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
13260_mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
13261 const int __imm)
f4ee3a9e 13262{
79fb4764
HJ
13263 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
13264 (__v4sf) __D,
13265 __imm,
13266 (__v16sf)
13267 _mm512_setzero_ps (), __B);
f4ee3a9e
UB
13268}
13269
79fb4764 13270extern __inline __m512i
f4ee3a9e 13271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
13272_mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
13273 __m128i __D, const int __imm)
f4ee3a9e 13274{
79fb4764
HJ
13275 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
13276 (__v4si) __D,
13277 __imm,
13278 (__v16si) __A,
13279 __B);
f4ee3a9e
UB
13280}
13281
79fb4764 13282extern __inline __m512
f4ee3a9e 13283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
13284_mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
13285 __m128 __D, const int __imm)
f4ee3a9e 13286{
79fb4764
HJ
13287 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
13288 (__v4sf) __D,
13289 __imm,
13290 (__v16sf) __A, __B);
f4ee3a9e 13291}
79fb4764
HJ
13292#else
13293#define _mm512_maskz_insertf32x4(A, X, Y, C) \
13294 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
13295 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
13296 (__mmask16)(A)))
f4ee3a9e 13297
79fb4764
HJ
13298#define _mm512_maskz_inserti32x4(A, X, Y, C) \
13299 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
13300 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
13301 (__mmask16)(A)))
13302
13303#define _mm512_mask_insertf32x4(A, B, X, Y, C) \
13304 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
13305 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
13306 (__mmask16)(B)))
13307
13308#define _mm512_mask_inserti32x4(A, B, X, Y, C) \
13309 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
13310 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
13311 (__mmask16)(B)))
13312#endif
13313
13314extern __inline __m512i
f4ee3a9e 13315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13316_mm512_max_epi64 (__m512i __A, __m512i __B)
f4ee3a9e 13317{
79fb4764
HJ
13318 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
13319 (__v8di) __B,
13320 (__v8di)
13321 _mm512_undefined_epi32 (),
13322 (__mmask8) -1);
f4ee3a9e
UB
13323}
13324
79fb4764 13325extern __inline __m512i
756c5857 13326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13327_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
756c5857 13328{
79fb4764
HJ
13329 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
13330 (__v8di) __B,
13331 (__v8di)
13332 _mm512_setzero_si512 (),
13333 __M);
756c5857
AI
13334}
13335
79fb4764 13336extern __inline __m512i
756c5857 13337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13338_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
756c5857 13339{
79fb4764
HJ
13340 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
13341 (__v8di) __B,
13342 (__v8di) __W, __M);
756c5857
AI
13343}
13344
79fb4764 13345extern __inline __m512i
756c5857 13346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13347_mm512_min_epi64 (__m512i __A, __m512i __B)
756c5857 13348{
79fb4764
HJ
13349 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
13350 (__v8di) __B,
13351 (__v8di)
13352 _mm512_undefined_epi32 (),
13353 (__mmask8) -1);
756c5857
AI
13354}
13355
79fb4764 13356extern __inline __m512i
756c5857 13357__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13358_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
756c5857 13359{
79fb4764
HJ
13360 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
13361 (__v8di) __B,
13362 (__v8di) __W, __M);
756c5857
AI
13363}
13364
79fb4764 13365extern __inline __m512i
756c5857 13366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13367_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
756c5857 13368{
79fb4764
HJ
13369 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
13370 (__v8di) __B,
13371 (__v8di)
13372 _mm512_setzero_si512 (),
13373 __M);
756c5857
AI
13374}
13375
79fb4764 13376extern __inline __m512i
756c5857 13377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13378_mm512_max_epu64 (__m512i __A, __m512i __B)
756c5857 13379{
79fb4764
HJ
13380 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
13381 (__v8di) __B,
13382 (__v8di)
13383 _mm512_undefined_epi32 (),
13384 (__mmask8) -1);
756c5857
AI
13385}
13386
79fb4764 13387extern __inline __m512i
f4ee3a9e 13388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13389_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
f4ee3a9e 13390{
79fb4764
HJ
13391 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
13392 (__v8di) __B,
13393 (__v8di)
13394 _mm512_setzero_si512 (),
13395 __M);
f4ee3a9e
UB
13396}
13397
79fb4764 13398extern __inline __m512i
f4ee3a9e 13399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13400_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
f4ee3a9e 13401{
79fb4764
HJ
13402 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
13403 (__v8di) __B,
13404 (__v8di) __W, __M);
f4ee3a9e
UB
13405}
13406
79fb4764 13407extern __inline __m512i
f4ee3a9e 13408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13409_mm512_min_epu64 (__m512i __A, __m512i __B)
f4ee3a9e 13410{
79fb4764
HJ
13411 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
13412 (__v8di) __B,
13413 (__v8di)
13414 _mm512_undefined_epi32 (),
13415 (__mmask8) -1);
f4ee3a9e
UB
13416}
13417
79fb4764 13418extern __inline __m512i
f4ee3a9e 13419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13420_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
f4ee3a9e 13421{
79fb4764
HJ
13422 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
13423 (__v8di) __B,
13424 (__v8di) __W, __M);
f4ee3a9e
UB
13425}
13426
79fb4764 13427extern __inline __m512i
756c5857 13428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13429_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
756c5857 13430{
79fb4764
HJ
13431 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
13432 (__v8di) __B,
13433 (__v8di)
13434 _mm512_setzero_si512 (),
13435 __M);
756c5857
AI
13436}
13437
79fb4764 13438extern __inline __m512i
756c5857 13439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13440_mm512_max_epi32 (__m512i __A, __m512i __B)
756c5857 13441{
79fb4764
HJ
13442 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
13443 (__v16si) __B,
13444 (__v16si)
13445 _mm512_undefined_epi32 (),
13446 (__mmask16) -1);
756c5857
AI
13447}
13448
79fb4764 13449extern __inline __m512i
756c5857 13450__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13451_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
756c5857 13452{
79fb4764
HJ
13453 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
13454 (__v16si) __B,
13455 (__v16si)
13456 _mm512_setzero_si512 (),
13457 __M);
756c5857
AI
13458}
13459
79fb4764 13460extern __inline __m512i
756c5857 13461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13462_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
756c5857 13463{
79fb4764
HJ
13464 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
13465 (__v16si) __B,
13466 (__v16si) __W, __M);
756c5857
AI
13467}
13468
79fb4764 13469extern __inline __m512i
756c5857 13470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13471_mm512_min_epi32 (__m512i __A, __m512i __B)
756c5857 13472{
79fb4764
HJ
13473 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
13474 (__v16si) __B,
13475 (__v16si)
13476 _mm512_undefined_epi32 (),
13477 (__mmask16) -1);
756c5857
AI
13478}
13479
79fb4764 13480extern __inline __m512i
756c5857 13481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13482_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
756c5857 13483{
79fb4764
HJ
13484 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
13485 (__v16si) __B,
13486 (__v16si)
13487 _mm512_setzero_si512 (),
13488 __M);
756c5857
AI
13489}
13490
79fb4764 13491extern __inline __m512i
dc7401c0 13492__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13493_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
dc7401c0 13494{
79fb4764
HJ
13495 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
13496 (__v16si) __B,
13497 (__v16si) __W, __M);
dc7401c0
SP
13498}
13499
79fb4764 13500extern __inline __m512i
dc7401c0 13501__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13502_mm512_max_epu32 (__m512i __A, __m512i __B)
dc7401c0 13503{
79fb4764
HJ
13504 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
13505 (__v16si) __B,
13506 (__v16si)
13507 _mm512_undefined_epi32 (),
13508 (__mmask16) -1);
dc7401c0
SP
13509}
13510
79fb4764 13511extern __inline __m512i
dc7401c0 13512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13513_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
dc7401c0 13514{
79fb4764
HJ
13515 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
13516 (__v16si) __B,
13517 (__v16si)
13518 _mm512_setzero_si512 (),
13519 __M);
dc7401c0
SP
13520}
13521
79fb4764 13522extern __inline __m512i
dc7401c0 13523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13524_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
dc7401c0 13525{
79fb4764
HJ
13526 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
13527 (__v16si) __B,
13528 (__v16si) __W, __M);
dc7401c0
SP
13529}
13530
79fb4764 13531extern __inline __m512i
756c5857 13532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13533_mm512_min_epu32 (__m512i __A, __m512i __B)
756c5857 13534{
79fb4764
HJ
13535 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
13536 (__v16si) __B,
13537 (__v16si)
13538 _mm512_undefined_epi32 (),
13539 (__mmask16) -1);
756c5857
AI
13540}
13541
79fb4764 13542extern __inline __m512i
756c5857 13543__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13544_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
756c5857 13545{
79fb4764
HJ
13546 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
13547 (__v16si) __B,
13548 (__v16si)
13549 _mm512_setzero_si512 (),
13550 __M);
756c5857
AI
13551}
13552
79fb4764 13553extern __inline __m512i
756c5857 13554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13555_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
756c5857 13556{
79fb4764
HJ
13557 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
13558 (__v16si) __B,
13559 (__v16si) __W, __M);
756c5857
AI
13560}
13561
13562extern __inline __m512
13563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13564_mm512_unpacklo_ps (__m512 __A, __m512 __B)
756c5857 13565{
79fb4764
HJ
13566 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
13567 (__v16sf) __B,
13568 (__v16sf)
13569 _mm512_undefined_ps (),
13570 (__mmask16) -1);
756c5857
AI
13571}
13572
13573extern __inline __m512
13574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13575_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
756c5857 13576{
79fb4764
HJ
13577 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
13578 (__v16sf) __B,
13579 (__v16sf) __W,
13580 (__mmask16) __U);
756c5857
AI
13581}
13582
13583extern __inline __m512
13584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13585_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
756c5857 13586{
79fb4764
HJ
13587 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
13588 (__v16sf) __B,
13589 (__v16sf)
13590 _mm512_setzero_ps (),
13591 (__mmask16) __U);
756c5857
AI
13592}
13593
79fb4764 13594extern __inline __m512d
dc7401c0 13595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13596_mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
dc7401c0 13597{
79fb4764
HJ
13598 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
13599 (__v8df) __W,
13600 (__mmask8) __U);
dc7401c0
SP
13601}
13602
79fb4764 13603extern __inline __m512
dc7401c0 13604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13605_mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
dc7401c0 13606{
79fb4764
HJ
13607 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
13608 (__v16sf) __W,
13609 (__mmask16) __U);
dc7401c0
SP
13610}
13611
79fb4764 13612extern __inline __m512i
dc7401c0 13613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13614_mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
dc7401c0 13615{
79fb4764
HJ
13616 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
13617 (__v8di) __W,
13618 (__mmask8) __U);
dc7401c0
SP
13619}
13620
79fb4764 13621extern __inline __m512i
dc7401c0 13622__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13623_mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
dc7401c0 13624{
79fb4764
HJ
13625 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
13626 (__v16si) __W,
13627 (__mmask16) __U);
dc7401c0
SP
13628}
13629
756c5857
AI
13630extern __inline __m512d
13631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13632_mm512_sqrt_pd (__m512d __A)
756c5857 13633{
79fb4764
HJ
13634 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
13635 (__v8df)
13636 _mm512_undefined_pd (),
13637 (__mmask8) -1,
13638 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13639}
13640
13641extern __inline __m512d
13642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13643_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
756c5857 13644{
79fb4764
HJ
13645 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
13646 (__v8df) __W,
13647 (__mmask8) __U,
13648 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13649}
13650
13651extern __inline __m512d
13652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13653_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
756c5857 13654{
79fb4764
HJ
13655 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
13656 (__v8df)
13657 _mm512_setzero_pd (),
13658 (__mmask8) __U,
13659 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13660}
13661
13662extern __inline __m512
13663__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13664_mm512_sqrt_ps (__m512 __A)
756c5857 13665{
79fb4764
HJ
13666 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
13667 (__v16sf)
13668 _mm512_undefined_ps (),
13669 (__mmask16) -1,
13670 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13671}
13672
13673extern __inline __m512
13674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13675_mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
756c5857 13676{
79fb4764
HJ
13677 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
13678 (__v16sf) __W,
13679 (__mmask16) __U,
13680 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13681}
13682
13683extern __inline __m512
13684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13685_mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
756c5857 13686{
79fb4764
HJ
13687 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
13688 (__v16sf)
13689 _mm512_setzero_ps (),
13690 (__mmask16) __U,
13691 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13692}
13693
79fb4764 13694extern __inline __m512d
075691af 13695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13696_mm512_add_pd (__m512d __A, __m512d __B)
075691af 13697{
79fb4764 13698 return (__m512d) ((__v8df)__A + (__v8df)__B);
075691af
AI
13699}
13700
79fb4764 13701extern __inline __m512d
075691af 13702__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13703_mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
075691af 13704{
79fb4764
HJ
13705 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
13706 (__v8df) __B,
13707 (__v8df) __W,
13708 (__mmask8) __U,
13709 _MM_FROUND_CUR_DIRECTION);
075691af
AI
13710}
13711
756c5857
AI
13712extern __inline __m512d
13713__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
13714_mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
13715{
13716 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
13717 (__v8df) __B,
13718 (__v8df)
13719 _mm512_setzero_pd (),
13720 (__mmask8) __U,
13721 _MM_FROUND_CUR_DIRECTION);
13722}
13723
13724extern __inline __m512
13725__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13726_mm512_add_ps (__m512 __A, __m512 __B)
13727{
13728 return (__m512) ((__v16sf)__A + (__v16sf)__B);
13729}
13730
13731extern __inline __m512
13732__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13733_mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
756c5857 13734{
79fb4764
HJ
13735 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
13736 (__v16sf) __B,
13737 (__v16sf) __W,
13738 (__mmask16) __U,
13739 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13740}
13741
79fb4764 13742extern __inline __m512
756c5857 13743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13744_mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
756c5857 13745{
79fb4764
HJ
13746 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
13747 (__v16sf) __B,
13748 (__v16sf)
13749 _mm512_setzero_ps (),
13750 (__mmask16) __U,
13751 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13752}
13753
13754extern __inline __m512d
13755__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13756_mm512_sub_pd (__m512d __A, __m512d __B)
756c5857 13757{
79fb4764 13758 return (__m512d) ((__v8df)__A - (__v8df)__B);
756c5857
AI
13759}
13760
13761extern __inline __m512d
13762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13763_mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
756c5857 13764{
79fb4764
HJ
13765 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
13766 (__v8df) __B,
13767 (__v8df) __W,
13768 (__mmask8) __U,
13769 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13770}
13771
79fb4764 13772extern __inline __m512d
756c5857 13773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13774_mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
756c5857 13775{
79fb4764
HJ
13776 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
13777 (__v8df) __B,
13778 (__v8df)
13779 _mm512_setzero_pd (),
13780 (__mmask8) __U,
13781 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13782}
13783
13784extern __inline __m512
13785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13786_mm512_sub_ps (__m512 __A, __m512 __B)
756c5857 13787{
79fb4764 13788 return (__m512) ((__v16sf)__A - (__v16sf)__B);
756c5857
AI
13789}
13790
13791extern __inline __m512
13792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13793_mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
756c5857 13794{
79fb4764
HJ
13795 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
13796 (__v16sf) __B,
13797 (__v16sf) __W,
13798 (__mmask16) __U,
13799 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13800}
13801
13802extern __inline __m512
13803__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13804_mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
756c5857 13805{
79fb4764
HJ
13806 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
13807 (__v16sf) __B,
13808 (__v16sf)
13809 _mm512_setzero_ps (),
13810 (__mmask16) __U,
13811 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13812}
13813
13814extern __inline __m512d
13815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13816_mm512_mul_pd (__m512d __A, __m512d __B)
756c5857 13817{
79fb4764 13818 return (__m512d) ((__v8df)__A * (__v8df)__B);
756c5857
AI
13819}
13820
13821extern __inline __m512d
13822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13823_mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
756c5857 13824{
79fb4764
HJ
13825 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
13826 (__v8df) __B,
13827 (__v8df) __W,
13828 (__mmask8) __U,
13829 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13830}
13831
13832extern __inline __m512d
13833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13834_mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
756c5857 13835{
79fb4764
HJ
13836 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
13837 (__v8df) __B,
13838 (__v8df)
13839 _mm512_setzero_pd (),
13840 (__mmask8) __U,
13841 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13842}
13843
79fb4764 13844extern __inline __m512
756c5857 13845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13846_mm512_mul_ps (__m512 __A, __m512 __B)
756c5857 13847{
79fb4764 13848 return (__m512) ((__v16sf)__A * (__v16sf)__B);
756c5857
AI
13849}
13850
13851extern __inline __m512
13852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13853_mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
756c5857 13854{
79fb4764
HJ
13855 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
13856 (__v16sf) __B,
13857 (__v16sf) __W,
13858 (__mmask16) __U,
13859 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13860}
13861
13862extern __inline __m512
13863__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13864_mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
756c5857 13865{
79fb4764
HJ
13866 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
13867 (__v16sf) __B,
13868 (__v16sf)
13869 _mm512_setzero_ps (),
13870 (__mmask16) __U,
13871 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13872}
13873
79fb4764 13874extern __inline __m512d
756c5857 13875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13876_mm512_div_pd (__m512d __M, __m512d __V)
756c5857 13877{
79fb4764 13878 return (__m512d) ((__v8df)__M / (__v8df)__V);
756c5857
AI
13879}
13880
79fb4764 13881extern __inline __m512d
756c5857 13882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13883_mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
756c5857 13884{
79fb4764
HJ
13885 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
13886 (__v8df) __V,
13887 (__v8df) __W,
13888 (__mmask8) __U,
13889 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13890}
13891
13892extern __inline __m512d
13893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13894_mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
756c5857 13895{
79fb4764
HJ
13896 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
13897 (__v8df) __V,
13898 (__v8df)
13899 _mm512_setzero_pd (),
13900 (__mmask8) __U,
13901 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13902}
13903
79fb4764 13904extern __inline __m512
756c5857 13905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13906_mm512_div_ps (__m512 __A, __m512 __B)
756c5857 13907{
79fb4764 13908 return (__m512) ((__v16sf)__A / (__v16sf)__B);
756c5857
AI
13909}
13910
79fb4764 13911extern __inline __m512
756c5857 13912__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13913_mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
756c5857 13914{
79fb4764
HJ
13915 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
13916 (__v16sf) __B,
13917 (__v16sf) __W,
13918 (__mmask16) __U,
13919 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13920}
13921
79fb4764 13922extern __inline __m512
756c5857 13923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13924_mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
756c5857 13925{
79fb4764
HJ
13926 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
13927 (__v16sf) __B,
13928 (__v16sf)
13929 _mm512_setzero_ps (),
13930 (__mmask16) __U,
13931 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13932}
13933
79fb4764 13934extern __inline __m512d
756c5857 13935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13936_mm512_max_pd (__m512d __A, __m512d __B)
756c5857 13937{
79fb4764
HJ
13938 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
13939 (__v8df) __B,
13940 (__v8df)
13941 _mm512_undefined_pd (),
13942 (__mmask8) -1,
13943 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13944}
13945
79fb4764 13946extern __inline __m512d
756c5857 13947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13948_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
756c5857 13949{
79fb4764
HJ
13950 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
13951 (__v8df) __B,
13952 (__v8df) __W,
13953 (__mmask8) __U,
13954 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13955}
13956
79fb4764 13957extern __inline __m512d
756c5857 13958__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13959_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
756c5857 13960{
79fb4764
HJ
13961 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
13962 (__v8df) __B,
13963 (__v8df)
13964 _mm512_setzero_pd (),
13965 (__mmask8) __U,
13966 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13967}
13968
13969extern __inline __m512
13970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13971_mm512_max_ps (__m512 __A, __m512 __B)
756c5857 13972{
79fb4764
HJ
13973 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
13974 (__v16sf) __B,
13975 (__v16sf)
13976 _mm512_undefined_ps (),
13977 (__mmask16) -1,
13978 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13979}
13980
79fb4764 13981extern __inline __m512
756c5857 13982__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13983_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
756c5857 13984{
79fb4764
HJ
13985 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
13986 (__v16sf) __B,
13987 (__v16sf) __W,
13988 (__mmask16) __U,
13989 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
13990}
13991
79fb4764 13992extern __inline __m512
756c5857 13993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 13994_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
756c5857 13995{
79fb4764
HJ
13996 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
13997 (__v16sf) __B,
13998 (__v16sf)
13999 _mm512_setzero_ps (),
14000 (__mmask16) __U,
14001 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14002}
14003
14004extern __inline __m512d
14005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14006_mm512_min_pd (__m512d __A, __m512d __B)
756c5857 14007{
79fb4764
HJ
14008 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
14009 (__v8df) __B,
14010 (__v8df)
14011 _mm512_undefined_pd (),
14012 (__mmask8) -1,
14013 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14014}
14015
14016extern __inline __m512d
14017__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14018_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
756c5857 14019{
79fb4764
HJ
14020 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
14021 (__v8df) __B,
14022 (__v8df) __W,
14023 (__mmask8) __U,
14024 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14025}
14026
79fb4764 14027extern __inline __m512d
756c5857 14028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14029_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
756c5857 14030{
79fb4764
HJ
14031 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
14032 (__v8df) __B,
14033 (__v8df)
14034 _mm512_setzero_pd (),
14035 (__mmask8) __U,
14036 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14037}
14038
14039extern __inline __m512
14040__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14041_mm512_min_ps (__m512 __A, __m512 __B)
756c5857 14042{
79fb4764
HJ
14043 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
14044 (__v16sf) __B,
14045 (__v16sf)
14046 _mm512_undefined_ps (),
14047 (__mmask16) -1,
14048 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14049}
14050
14051extern __inline __m512
14052__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14053_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
756c5857 14054{
79fb4764
HJ
14055 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
14056 (__v16sf) __B,
14057 (__v16sf) __W,
14058 (__mmask16) __U,
14059 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14060}
14061
14062extern __inline __m512
14063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14064_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
756c5857 14065{
79fb4764
HJ
14066 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
14067 (__v16sf) __B,
14068 (__v16sf)
14069 _mm512_setzero_ps (),
14070 (__mmask16) __U,
14071 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14072}
14073
14074extern __inline __m512d
14075__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14076_mm512_scalef_pd (__m512d __A, __m512d __B)
756c5857 14077{
79fb4764
HJ
14078 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
14079 (__v8df) __B,
14080 (__v8df)
14081 _mm512_undefined_pd (),
14082 (__mmask8) -1,
14083 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14084}
14085
14086extern __inline __m512d
14087__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14088_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
756c5857 14089{
79fb4764
HJ
14090 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
14091 (__v8df) __B,
14092 (__v8df) __W,
14093 (__mmask8) __U,
14094 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14095}
14096
14097extern __inline __m512d
14098__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14099_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
756c5857 14100{
79fb4764
HJ
14101 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
14102 (__v8df) __B,
14103 (__v8df)
14104 _mm512_setzero_pd (),
14105 (__mmask8) __U,
14106 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14107}
14108
79fb4764 14109extern __inline __m512
756c5857 14110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14111_mm512_scalef_ps (__m512 __A, __m512 __B)
756c5857 14112{
79fb4764
HJ
14113 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
14114 (__v16sf) __B,
14115 (__v16sf)
14116 _mm512_undefined_ps (),
14117 (__mmask16) -1,
14118 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14119}
14120
14121extern __inline __m512
14122__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14123_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
756c5857 14124{
79fb4764
HJ
14125 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
14126 (__v16sf) __B,
14127 (__v16sf) __W,
14128 (__mmask16) __U,
14129 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14130}
14131
14132extern __inline __m512
14133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14134_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
756c5857 14135{
79fb4764
HJ
14136 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
14137 (__v16sf) __B,
14138 (__v16sf)
14139 _mm512_setzero_ps (),
14140 (__mmask16) __U,
14141 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14142}
14143
79fb4764 14144extern __inline __m512d
756c5857 14145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14146_mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
756c5857 14147{
79fb4764
HJ
14148 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
14149 (__v8df) __B,
14150 (__v8df) __C,
14151 (__mmask8) -1,
14152 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14153}
14154
79fb4764 14155extern __inline __m512d
756c5857 14156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14157_mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
756c5857 14158{
79fb4764
HJ
14159 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
14160 (__v8df) __B,
14161 (__v8df) __C,
14162 (__mmask8) __U,
14163 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14164}
14165
14166extern __inline __m512d
14167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14168_mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
756c5857 14169{
79fb4764 14170 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
38ef6fb1
L
14171 (__v8df) __B,
14172 (__v8df) __C,
79fb4764 14173 (__mmask8) __U,
38ef6fb1 14174 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14175}
14176
14177extern __inline __m512d
14178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14179_mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
756c5857 14180{
79fb4764 14181 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
756c5857
AI
14182 (__v8df) __B,
14183 (__v8df) __C,
14184 (__mmask8) __U,
14185 _MM_FROUND_CUR_DIRECTION);
14186}
14187
79fb4764 14188extern __inline __m512
756c5857 14189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14190_mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
756c5857 14191{
79fb4764
HJ
14192 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
14193 (__v16sf) __B,
14194 (__v16sf) __C,
14195 (__mmask16) -1,
14196 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14197}
14198
79fb4764 14199extern __inline __m512
756c5857 14200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14201_mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
756c5857 14202{
79fb4764
HJ
14203 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
14204 (__v16sf) __B,
14205 (__v16sf) __C,
14206 (__mmask16) __U,
14207 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14208}
14209
14210extern __inline __m512
14211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14212_mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
756c5857 14213{
79fb4764 14214 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
38ef6fb1
L
14215 (__v16sf) __B,
14216 (__v16sf) __C,
79fb4764 14217 (__mmask16) __U,
38ef6fb1 14218 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14219}
14220
14221extern __inline __m512
14222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14223_mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
756c5857 14224{
79fb4764 14225 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
756c5857
AI
14226 (__v16sf) __B,
14227 (__v16sf) __C,
14228 (__mmask16) __U,
14229 _MM_FROUND_CUR_DIRECTION);
14230}
14231
79fb4764 14232extern __inline __m512d
756c5857 14233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14234_mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
756c5857 14235{
79fb4764
HJ
14236 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
14237 (__v8df) __B,
14238 (__v8df) __C,
14239 (__mmask8) -1,
14240 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14241}
14242
79fb4764 14243extern __inline __m512d
756c5857 14244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14245_mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
756c5857 14246{
79fb4764
HJ
14247 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
14248 (__v8df) __B,
14249 (__v8df) __C,
14250 (__mmask8) __U,
14251 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14252}
14253
79fb4764 14254extern __inline __m512d
756c5857 14255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14256_mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
756c5857 14257{
79fb4764
HJ
14258 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
14259 (__v8df) __B,
14260 (__v8df) __C,
14261 (__mmask8) __U,
756c5857
AI
14262 _MM_FROUND_CUR_DIRECTION);
14263}
14264
79fb4764 14265extern __inline __m512d
756c5857 14266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14267_mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
756c5857 14268{
79fb4764
HJ
14269 return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A,
14270 (__v8df) __B,
14271 (__v8df) __C,
756c5857
AI
14272 (__mmask8) __U,
14273 _MM_FROUND_CUR_DIRECTION);
14274}
14275
79fb4764 14276extern __inline __m512
756c5857 14277__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14278_mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
756c5857 14279{
79fb4764
HJ
14280 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
14281 (__v16sf) __B,
14282 (__v16sf) __C,
14283 (__mmask16) -1,
14284 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14285}
14286
79fb4764 14287extern __inline __m512
756c5857 14288__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14289_mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
756c5857 14290{
79fb4764
HJ
14291 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
14292 (__v16sf) __B,
14293 (__v16sf) __C,
14294 (__mmask16) __U,
14295 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14296}
14297
79fb4764 14298extern __inline __m512
756c5857 14299__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14300_mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
756c5857 14301{
79fb4764
HJ
14302 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
14303 (__v16sf) __B,
14304 (__v16sf) __C,
14305 (__mmask16) __U,
14306 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14307}
14308
79fb4764 14309extern __inline __m512
756c5857 14310__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14311_mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
756c5857 14312{
79fb4764
HJ
14313 return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A,
14314 (__v16sf) __B,
14315 (__v16sf) __C,
14316 (__mmask16) __U,
14317 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14318}
14319
79fb4764 14320extern __inline __m512d
756c5857 14321__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14322_mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
756c5857 14323{
79fb4764
HJ
14324 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
14325 (__v8df) __B,
14326 (__v8df) __C,
14327 (__mmask8) -1,
14328 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14329}
14330
79fb4764 14331extern __inline __m512d
756c5857 14332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14333_mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
756c5857 14334{
79fb4764
HJ
14335 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
14336 (__v8df) __B,
14337 (__v8df) __C,
14338 (__mmask8) __U,
14339 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14340}
14341
79fb4764 14342extern __inline __m512d
756c5857 14343__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14344_mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
756c5857 14345{
79fb4764
HJ
14346 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
14347 (__v8df) __B,
14348 (__v8df) __C,
14349 (__mmask8) __U,
14350 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14351}
14352
79fb4764 14353extern __inline __m512d
756c5857 14354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14355_mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
756c5857 14356{
79fb4764
HJ
14357 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
14358 (__v8df) __B,
14359 (__v8df) __C,
14360 (__mmask8) __U,
14361 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14362}
14363
79fb4764 14364extern __inline __m512
756c5857 14365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14366_mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
756c5857 14367{
79fb4764
HJ
14368 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
14369 (__v16sf) __B,
14370 (__v16sf) __C,
14371 (__mmask16) -1,
14372 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14373}
14374
79fb4764 14375extern __inline __m512
756c5857 14376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14377_mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
756c5857 14378{
79fb4764
HJ
14379 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
14380 (__v16sf) __B,
14381 (__v16sf) __C,
14382 (__mmask16) __U,
14383 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14384}
14385
79fb4764
HJ
14386extern __inline __m512
14387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14388_mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
14389{
14390 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
14391 (__v16sf) __B,
14392 (__v16sf) __C,
14393 (__mmask16) __U,
14394 _MM_FROUND_CUR_DIRECTION);
14395}
14396
14397extern __inline __m512
14398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14399_mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
14400{
14401 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
14402 (__v16sf) __B,
14403 (__v16sf) __C,
14404 (__mmask16) __U,
14405 _MM_FROUND_CUR_DIRECTION);
14406}
14407
14408extern __inline __m512d
756c5857 14409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14410_mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
756c5857 14411{
79fb4764
HJ
14412 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
14413 (__v8df) __B,
14414 -(__v8df) __C,
14415 (__mmask8) -1,
14416 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14417}
14418
79fb4764 14419extern __inline __m512d
756c5857 14420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14421_mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
756c5857 14422{
79fb4764
HJ
14423 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
14424 (__v8df) __B,
14425 -(__v8df) __C,
14426 (__mmask8) __U,
14427 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14428}
14429
79fb4764 14430extern __inline __m512d
756c5857 14431__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14432_mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
756c5857 14433{
79fb4764
HJ
14434 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
14435 (__v8df) __B,
14436 (__v8df) __C,
14437 (__mmask8) __U,
14438 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14439}
14440
79fb4764 14441extern __inline __m512d
756c5857 14442__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14443_mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
756c5857 14444{
79fb4764
HJ
14445 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
14446 (__v8df) __B,
14447 -(__v8df) __C,
14448 (__mmask8) __U,
14449 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14450}
14451
79fb4764 14452extern __inline __m512
756c5857 14453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14454_mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
756c5857 14455{
79fb4764
HJ
14456 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
14457 (__v16sf) __B,
14458 -(__v16sf) __C,
14459 (__mmask16) -1,
756c5857
AI
14460 _MM_FROUND_CUR_DIRECTION);
14461}
14462
79fb4764 14463extern __inline __m512
756c5857 14464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14465_mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
756c5857 14466{
79fb4764
HJ
14467 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
14468 (__v16sf) __B,
14469 -(__v16sf) __C,
756c5857
AI
14470 (__mmask16) __U,
14471 _MM_FROUND_CUR_DIRECTION);
14472}
14473
79fb4764 14474extern __inline __m512
756c5857 14475__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14476_mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
756c5857 14477{
79fb4764
HJ
14478 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
14479 (__v16sf) __B,
14480 (__v16sf) __C,
14481 (__mmask16) __U,
14482 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14483}
14484
79fb4764 14485extern __inline __m512
756c5857 14486__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14487_mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
756c5857 14488{
79fb4764
HJ
14489 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
14490 (__v16sf) __B,
14491 -(__v16sf) __C,
14492 (__mmask16) __U,
14493 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14494}
14495
79fb4764 14496extern __inline __m512d
756c5857 14497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14498_mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
756c5857 14499{
79fb4764
HJ
14500 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
14501 (__v8df) __B,
14502 (__v8df) __C,
14503 (__mmask8) -1,
14504 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14505}
14506
79fb4764 14507extern __inline __m512d
756c5857 14508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14509_mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
756c5857 14510{
79fb4764
HJ
14511 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
14512 (__v8df) __B,
14513 (__v8df) __C,
14514 (__mmask8) __U,
756c5857
AI
14515 _MM_FROUND_CUR_DIRECTION);
14516}
14517
79fb4764 14518extern __inline __m512d
756c5857 14519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14520_mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
756c5857 14521{
79fb4764
HJ
14522 return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A,
14523 (__v8df) __B,
14524 (__v8df) __C,
14525 (__mmask8) __U,
14526 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14527}
14528
79fb4764 14529extern __inline __m512d
756c5857 14530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14531_mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
756c5857 14532{
79fb4764
HJ
14533 return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A,
14534 (__v8df) __B,
14535 (__v8df) __C,
14536 (__mmask8) __U,
14537 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14538}
14539
79fb4764 14540extern __inline __m512
dcb2c527 14541__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14542_mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
dcb2c527 14543{
79fb4764
HJ
14544 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
14545 (__v16sf) __B,
14546 (__v16sf) __C,
14547 (__mmask16) -1,
14548 _MM_FROUND_CUR_DIRECTION);
dcb2c527
JJ
14549}
14550
79fb4764 14551extern __inline __m512
dcb2c527 14552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14553_mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
dcb2c527 14554{
79fb4764
HJ
14555 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
14556 (__v16sf) __B,
14557 (__v16sf) __C,
14558 (__mmask16) __U,
14559 _MM_FROUND_CUR_DIRECTION);
dcb2c527
JJ
14560}
14561
79fb4764 14562extern __inline __m512
756c5857 14563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14564_mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
756c5857 14565{
79fb4764
HJ
14566 return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A,
14567 (__v16sf) __B,
14568 (__v16sf) __C,
14569 (__mmask16) __U,
14570 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14571}
14572
79fb4764 14573extern __inline __m512
756c5857 14574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14575_mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
756c5857 14576{
79fb4764
HJ
14577 return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A,
14578 (__v16sf) __B,
14579 (__v16sf) __C,
14580 (__mmask16) __U,
14581 _MM_FROUND_CUR_DIRECTION);
756c5857 14582}
756c5857 14583
79fb4764 14584extern __inline __m512d
756c5857 14585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14586_mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
756c5857 14587{
79fb4764
HJ
14588 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
14589 (__v8df) __B,
14590 (__v8df) __C,
14591 (__mmask8) -1,
14592 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14593}
14594
79fb4764 14595extern __inline __m512d
756c5857 14596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14597_mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
756c5857 14598{
79fb4764
HJ
14599 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
14600 (__v8df) __B,
14601 (__v8df) __C,
14602 (__mmask8) __U,
14603 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14604}
14605
79fb4764 14606extern __inline __m512d
756c5857 14607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14608_mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
756c5857 14609{
79fb4764
HJ
14610 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
14611 (__v8df) __B,
14612 (__v8df) __C,
14613 (__mmask8) __U,
14614 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14615}
14616
79fb4764 14617extern __inline __m512d
756c5857 14618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14619_mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
756c5857 14620{
79fb4764
HJ
14621 return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A,
14622 (__v8df) __B,
14623 (__v8df) __C,
14624 (__mmask8) __U,
14625 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14626}
14627
14628extern __inline __m512
14629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14630_mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
756c5857 14631{
79fb4764
HJ
14632 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
14633 (__v16sf) __B,
14634 (__v16sf) __C,
756c5857
AI
14635 (__mmask16) -1,
14636 _MM_FROUND_CUR_DIRECTION);
14637}
14638
14639extern __inline __m512
14640__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14641_mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
756c5857 14642{
79fb4764
HJ
14643 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
14644 (__v16sf) __B,
14645 (__v16sf) __C,
756c5857
AI
14646 (__mmask16) __U,
14647 _MM_FROUND_CUR_DIRECTION);
14648}
14649
14650extern __inline __m512
14651__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14652_mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
756c5857 14653{
79fb4764
HJ
14654 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
14655 (__v16sf) __B,
14656 (__v16sf) __C,
14657 (__mmask16) __U,
14658 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14659}
14660
79fb4764 14661extern __inline __m512
756c5857 14662__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14663_mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
756c5857 14664{
79fb4764
HJ
14665 return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A,
14666 (__v16sf) __B,
14667 (__v16sf) __C,
14668 (__mmask16) __U,
14669 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14670}
14671
79fb4764 14672extern __inline __m256i
756c5857 14673__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14674_mm512_cvttpd_epi32 (__m512d __A)
756c5857 14675{
79fb4764
HJ
14676 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
14677 (__v8si)
14678 _mm256_undefined_si256 (),
14679 (__mmask8) -1,
14680 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14681}
14682
79fb4764 14683extern __inline __m256i
756c5857 14684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14685_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
756c5857 14686{
79fb4764
HJ
14687 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
14688 (__v8si) __W,
14689 (__mmask8) __U,
14690 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14691}
14692
79fb4764 14693extern __inline __m256i
756c5857 14694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14695_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
756c5857 14696{
79fb4764
HJ
14697 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
14698 (__v8si)
14699 _mm256_setzero_si256 (),
14700 (__mmask8) __U,
756c5857
AI
14701 _MM_FROUND_CUR_DIRECTION);
14702}
14703
79fb4764 14704extern __inline __m256i
756c5857 14705__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14706_mm512_cvttpd_epu32 (__m512d __A)
756c5857 14707{
79fb4764
HJ
14708 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
14709 (__v8si)
14710 _mm256_undefined_si256 (),
14711 (__mmask8) -1,
14712 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14713}
14714
79fb4764 14715extern __inline __m256i
756c5857 14716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14717_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
756c5857 14718{
79fb4764
HJ
14719 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
14720 (__v8si) __W,
14721 (__mmask8) __U,
756c5857
AI
14722 _MM_FROUND_CUR_DIRECTION);
14723}
14724
79fb4764 14725extern __inline __m256i
756c5857 14726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14727_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
756c5857 14728{
79fb4764
HJ
14729 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
14730 (__v8si)
14731 _mm256_setzero_si256 (),
14732 (__mmask8) __U,
14733 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14734}
14735
79fb4764 14736extern __inline __m256i
756c5857 14737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14738_mm512_cvtpd_epi32 (__m512d __A)
756c5857 14739{
79fb4764
HJ
14740 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
14741 (__v8si)
14742 _mm256_undefined_si256 (),
14743 (__mmask8) -1,
14744 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14745}
14746
79fb4764 14747extern __inline __m256i
756c5857 14748__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14749_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
756c5857 14750{
79fb4764
HJ
14751 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
14752 (__v8si) __W,
756c5857
AI
14753 (__mmask8) __U,
14754 _MM_FROUND_CUR_DIRECTION);
14755}
14756
79fb4764 14757extern __inline __m256i
756c5857 14758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14759_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
756c5857 14760{
79fb4764
HJ
14761 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
14762 (__v8si)
14763 _mm256_setzero_si256 (),
14764 (__mmask8) __U,
14765 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14766}
14767
79fb4764 14768extern __inline __m256i
756c5857 14769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14770_mm512_cvtpd_epu32 (__m512d __A)
756c5857 14771{
79fb4764
HJ
14772 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
14773 (__v8si)
14774 _mm256_undefined_si256 (),
14775 (__mmask8) -1,
14776 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14777}
14778
79fb4764 14779extern __inline __m256i
756c5857 14780__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14781_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
756c5857 14782{
79fb4764
HJ
14783 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
14784 (__v8si) __W,
14785 (__mmask8) __U,
14786 _MM_FROUND_CUR_DIRECTION);
756c5857 14787}
756c5857 14788
79fb4764
HJ
14789extern __inline __m256i
14790__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14791_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
14792{
14793 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
14794 (__v8si)
14795 _mm256_setzero_si256 (),
14796 (__mmask8) __U,
14797 _MM_FROUND_CUR_DIRECTION);
14798}
756c5857 14799
79fb4764
HJ
14800extern __inline __m512i
14801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14802_mm512_cvttps_epi32 (__m512 __A)
14803{
14804 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
14805 (__v16si)
14806 _mm512_undefined_epi32 (),
14807 (__mmask16) -1,
14808 _MM_FROUND_CUR_DIRECTION);
14809}
756c5857 14810
79fb4764
HJ
14811extern __inline __m512i
14812__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14813_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
14814{
14815 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
14816 (__v16si) __W,
14817 (__mmask16) __U,
14818 _MM_FROUND_CUR_DIRECTION);
14819}
756c5857 14820
79fb4764
HJ
14821extern __inline __m512i
14822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14823_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
14824{
14825 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
14826 (__v16si)
14827 _mm512_setzero_si512 (),
14828 (__mmask16) __U,
14829 _MM_FROUND_CUR_DIRECTION);
14830}
756c5857 14831
79fb4764
HJ
14832extern __inline __m512i
14833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14834_mm512_cvttps_epu32 (__m512 __A)
14835{
14836 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
14837 (__v16si)
14838 _mm512_undefined_epi32 (),
14839 (__mmask16) -1,
14840 _MM_FROUND_CUR_DIRECTION);
14841}
756c5857 14842
79fb4764
HJ
14843extern __inline __m512i
14844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14845_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
14846{
14847 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
14848 (__v16si) __W,
14849 (__mmask16) __U,
14850 _MM_FROUND_CUR_DIRECTION);
14851}
756c5857 14852
79fb4764
HJ
14853extern __inline __m512i
14854__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14855_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
14856{
14857 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
14858 (__v16si)
14859 _mm512_setzero_si512 (),
14860 (__mmask16) __U,
14861 _MM_FROUND_CUR_DIRECTION);
14862}
756c5857 14863
79fb4764 14864extern __inline __m512i
756c5857 14865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14866_mm512_cvtps_epi32 (__m512 __A)
756c5857 14867{
79fb4764
HJ
14868 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
14869 (__v16si)
14870 _mm512_undefined_epi32 (),
14871 (__mmask16) -1,
14872 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14873}
14874
79fb4764 14875extern __inline __m512i
756c5857 14876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14877_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
756c5857 14878{
79fb4764
HJ
14879 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
14880 (__v16si) __W,
14881 (__mmask16) __U,
14882 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14883}
14884
79fb4764 14885extern __inline __m512i
756c5857 14886__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14887_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
756c5857 14888{
79fb4764
HJ
14889 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
14890 (__v16si)
14891 _mm512_setzero_si512 (),
14892 (__mmask16) __U,
14893 _MM_FROUND_CUR_DIRECTION);
756c5857 14894}
756c5857 14895
79fb4764 14896extern __inline __m512i
93103603 14897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14898_mm512_cvtps_epu32 (__m512 __A)
93103603 14899{
79fb4764
HJ
14900 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
14901 (__v16si)
14902 _mm512_undefined_epi32 (),
14903 (__mmask16) -1,
14904 _MM_FROUND_CUR_DIRECTION);
93103603
SP
14905}
14906
79fb4764 14907extern __inline __m512i
756c5857 14908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14909_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
756c5857 14910{
79fb4764
HJ
14911 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
14912 (__v16si) __W,
14913 (__mmask16) __U,
14914 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14915}
14916
79fb4764 14917extern __inline __m512i
756c5857 14918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14919_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
756c5857 14920{
79fb4764
HJ
14921 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
14922 (__v16si)
14923 _mm512_setzero_si512 (),
14924 (__mmask16) __U,
14925 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14926}
14927
79fb4764 14928extern __inline double
756c5857 14929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14930_mm512_cvtsd_f64 (__m512d __A)
756c5857 14931{
79fb4764 14932 return __A[0];
756c5857
AI
14933}
14934
79fb4764 14935extern __inline float
93103603 14936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14937_mm512_cvtss_f32 (__m512 __A)
93103603 14938{
79fb4764 14939 return __A[0];
93103603
SP
14940}
14941
79fb4764 14942extern __inline __m512
93103603 14943__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14944_mm512_cvtepi32_ps (__m512i __A)
93103603 14945{
79fb4764
HJ
14946 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
14947 (__v16sf)
14948 _mm512_undefined_ps (),
14949 (__mmask16) -1,
14950 _MM_FROUND_CUR_DIRECTION);
93103603
SP
14951}
14952
79fb4764 14953extern __inline __m512
93103603 14954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14955_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
93103603 14956{
79fb4764
HJ
14957 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
14958 (__v16sf) __W,
14959 (__mmask16) __U,
14960 _MM_FROUND_CUR_DIRECTION);
93103603
SP
14961}
14962
79fb4764 14963extern __inline __m512
93103603 14964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14965_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
93103603 14966{
79fb4764
HJ
14967 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
14968 (__v16sf)
14969 _mm512_setzero_ps (),
14970 (__mmask16) __U,
14971 _MM_FROUND_CUR_DIRECTION);
93103603
SP
14972}
14973
79fb4764 14974extern __inline __m512
756c5857 14975__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14976_mm512_cvtepu32_ps (__m512i __A)
756c5857 14977{
79fb4764
HJ
14978 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
14979 (__v16sf)
14980 _mm512_undefined_ps (),
14981 (__mmask16) -1,
14982 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14983}
14984
79fb4764 14985extern __inline __m512
756c5857 14986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14987_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
756c5857 14988{
79fb4764
HJ
14989 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
14990 (__v16sf) __W,
14991 (__mmask16) __U,
14992 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
14993}
14994
79fb4764 14995extern __inline __m512
756c5857 14996__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 14997_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
756c5857 14998{
79fb4764
HJ
14999 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
15000 (__v16sf)
15001 _mm512_setzero_ps (),
15002 (__mmask16) __U,
15003 _MM_FROUND_CUR_DIRECTION);
756c5857 15004}
93103603 15005
79fb4764
HJ
15006#ifdef __OPTIMIZE__
15007extern __inline __m512d
93103603 15008__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 15009_mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
93103603 15010{
79fb4764
HJ
15011 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
15012 (__v8df) __B,
15013 (__v8di) __C,
15014 __imm,
15015 (__mmask8) -1,
15016 _MM_FROUND_CUR_DIRECTION);
93103603
SP
15017}
15018
79fb4764 15019extern __inline __m512d
93103603 15020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
15021_mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
15022 __m512i __C, const int __imm)
93103603 15023{
79fb4764
HJ
15024 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
15025 (__v8df) __B,
15026 (__v8di) __C,
15027 __imm,
15028 (__mmask8) __U,
15029 _MM_FROUND_CUR_DIRECTION);
93103603
SP
15030}
15031
79fb4764 15032extern __inline __m512d
93103603 15033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
15034_mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
15035 __m512i __C, const int __imm)
93103603 15036{
79fb4764
HJ
15037 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
15038 (__v8df) __B,
15039 (__v8di) __C,
15040 __imm,
15041 (__mmask8) __U,
15042 _MM_FROUND_CUR_DIRECTION);
93103603
SP
15043}
15044
79fb4764 15045extern __inline __m512
93103603 15046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 15047_mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
93103603 15048{
79fb4764
HJ
15049 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
15050 (__v16sf) __B,
15051 (__v16si) __C,
15052 __imm,
15053 (__mmask16) -1,
15054 _MM_FROUND_CUR_DIRECTION);
93103603 15055}
756c5857 15056
79fb4764 15057extern __inline __m512
756c5857 15058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
15059_mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
15060 __m512i __C, const int __imm)
756c5857 15061{
79fb4764
HJ
15062 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
15063 (__v16sf) __B,
15064 (__v16si) __C,
15065 __imm,
15066 (__mmask16) __U,
15067 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
15068}
15069
79fb4764 15070extern __inline __m512
756c5857 15071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764
HJ
15072_mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
15073 __m512i __C, const int __imm)
756c5857 15074{
79fb4764
HJ
15075 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
15076 (__v16sf) __B,
15077 (__v16si) __C,
15078 __imm,
15079 (__mmask16) __U,
15080 _MM_FROUND_CUR_DIRECTION);
756c5857
AI
15081}
15082
79fb4764
HJ
15083#else
15084#define _mm512_fixupimm_pd(X, Y, Z, C) \
15085 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
15086 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
15087 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
15088
15089#define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
15090 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
15091 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
15092 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
15093
15094#define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
15095 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
15096 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
15097 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
15098
15099#define _mm512_fixupimm_ps(X, Y, Z, C) \
15100 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
15101 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
15102 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
15103
15104#define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
15105 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
15106 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
15107 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
15108
15109#define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
15110 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
15111 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
15112 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
15113
15114#endif
15115
756c5857
AI
15116extern __inline int
15117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79fb4764 15118_mm512_cvtsi512_si32 (__m512i __A)
756c5857 15119{
79fb4764
HJ
15120 __v16si __B = (__v16si) __A;
15121 return __B[0];
756c5857
AI
15122}
15123
15124extern __inline __m512d
15125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15126_mm512_cvtps_pd (__m256 __A)
15127{
15128 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
15129 (__v8df)
0b192937 15130 _mm512_undefined_pd (),
756c5857
AI
15131 (__mmask8) -1,
15132 _MM_FROUND_CUR_DIRECTION);
15133}
15134
15135extern __inline __m512d
15136__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15137_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
15138{
15139 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
15140 (__v8df) __W,
15141 (__mmask8) __U,
15142 _MM_FROUND_CUR_DIRECTION);
15143}
15144
15145extern __inline __m512d
15146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15147_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
15148{
15149 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
15150 (__v8df)
15151 _mm512_setzero_pd (),
15152 (__mmask8) __U,
15153 _MM_FROUND_CUR_DIRECTION);
15154}
15155
15156extern __inline __m512
15157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15158_mm512_cvtph_ps (__m256i __A)
15159{
15160 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
15161 (__v16sf)
0b192937 15162 _mm512_undefined_ps (),
756c5857
AI
15163 (__mmask16) -1,
15164 _MM_FROUND_CUR_DIRECTION);
15165}
15166
15167extern __inline __m512
15168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15169_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
15170{
15171 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
15172 (__v16sf) __W,
15173 (__mmask16) __U,
15174 _MM_FROUND_CUR_DIRECTION);
15175}
15176
15177extern __inline __m512
15178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15179_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
15180{
15181 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
15182 (__v16sf)
15183 _mm512_setzero_ps (),
15184 (__mmask16) __U,
15185 _MM_FROUND_CUR_DIRECTION);
15186}
15187
15188extern __inline __m256
15189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15190_mm512_cvtpd_ps (__m512d __A)
15191{
15192 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
15193 (__v8sf)
0b192937 15194 _mm256_undefined_ps (),
756c5857
AI
15195 (__mmask8) -1,
15196 _MM_FROUND_CUR_DIRECTION);
15197}
15198
15199extern __inline __m256
15200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15201_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
15202{
15203 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
15204 (__v8sf) __W,
15205 (__mmask8) __U,
15206 _MM_FROUND_CUR_DIRECTION);
15207}
15208
15209extern __inline __m256
15210__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15211_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
15212{
15213 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
15214 (__v8sf)
15215 _mm256_setzero_ps (),
15216 (__mmask8) __U,
15217 _MM_FROUND_CUR_DIRECTION);
15218}
15219
15220#ifdef __OPTIMIZE__
15221extern __inline __m512
15222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15223_mm512_getexp_ps (__m512 __A)
15224{
15225 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
15226 (__v16sf)
0b192937 15227 _mm512_undefined_ps (),
756c5857
AI
15228 (__mmask16) -1,
15229 _MM_FROUND_CUR_DIRECTION);
15230}
15231
15232extern __inline __m512
15233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15234_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
15235{
15236 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
15237 (__v16sf) __W,
15238 (__mmask16) __U,
15239 _MM_FROUND_CUR_DIRECTION);
15240}
15241
15242extern __inline __m512
15243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15244_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
15245{
15246 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
15247 (__v16sf)
15248 _mm512_setzero_ps (),
15249 (__mmask16) __U,
15250 _MM_FROUND_CUR_DIRECTION);
15251}
15252
15253extern __inline __m512d
15254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15255_mm512_getexp_pd (__m512d __A)
15256{
15257 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
15258 (__v8df)
0b192937 15259 _mm512_undefined_pd (),
756c5857
AI
15260 (__mmask8) -1,
15261 _MM_FROUND_CUR_DIRECTION);
15262}
15263
15264extern __inline __m512d
15265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15266_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
15267{
15268 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
15269 (__v8df) __W,
15270 (__mmask8) __U,
15271 _MM_FROUND_CUR_DIRECTION);
15272}
15273
15274extern __inline __m512d
15275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15276_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
15277{
15278 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
15279 (__v8df)
15280 _mm512_setzero_pd (),
15281 (__mmask8) __U,
15282 _MM_FROUND_CUR_DIRECTION);
15283}
15284
15285extern __inline __m512d
15286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15287_mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
15288 _MM_MANTISSA_SIGN_ENUM __C)
15289{
15290 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
15291 (__C << 2) | __B,
0b192937 15292 _mm512_undefined_pd (),
756c5857
AI
15293 (__mmask8) -1,
15294 _MM_FROUND_CUR_DIRECTION);
15295}
15296
15297extern __inline __m512d
15298__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15299_mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
15300 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
15301{
15302 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
15303 (__C << 2) | __B,
15304 (__v8df) __W, __U,
15305 _MM_FROUND_CUR_DIRECTION);
15306}
15307
15308extern __inline __m512d
15309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15310_mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
15311 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
15312{
15313 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
15314 (__C << 2) | __B,
15315 (__v8df)
15316 _mm512_setzero_pd (),
15317 __U,
15318 _MM_FROUND_CUR_DIRECTION);
15319}
15320
15321extern __inline __m512
15322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15323_mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
15324 _MM_MANTISSA_SIGN_ENUM __C)
15325{
15326 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
15327 (__C << 2) | __B,
0b192937 15328 _mm512_undefined_ps (),
756c5857
AI
15329 (__mmask16) -1,
15330 _MM_FROUND_CUR_DIRECTION);
15331}
15332
15333extern __inline __m512
15334__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15335_mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
15336 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
15337{
15338 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
15339 (__C << 2) | __B,
15340 (__v16sf) __W, __U,
15341 _MM_FROUND_CUR_DIRECTION);
15342}
15343
15344extern __inline __m512
15345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15346_mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
15347 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
15348{
15349 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
15350 (__C << 2) | __B,
15351 (__v16sf)
15352 _mm512_setzero_ps (),
15353 __U,
15354 _MM_FROUND_CUR_DIRECTION);
15355}
15356
15357#else
15358#define _mm512_getmant_pd(X, B, C) \
15359 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
15360 (int)(((C)<<2) | (B)), \
0b192937 15361 (__v8df)_mm512_undefined_pd(), \
756c5857
AI
15362 (__mmask8)-1,\
15363 _MM_FROUND_CUR_DIRECTION))
15364
79fb4764
HJ
15365#define _mm512_mask_getmant_pd(W, U, X, B, C) \
15366 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
15367 (int)(((C)<<2) | (B)), \
15368 (__v8df)(__m512d)(W), \
68d872d7
SP
15369 (__mmask8)(U),\
15370 _MM_FROUND_CUR_DIRECTION))
15371
79fb4764
HJ
15372#define _mm512_maskz_getmant_pd(U, X, B, C) \
15373 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
15374 (int)(((C)<<2) | (B)), \
15375 (__v8df)_mm512_setzero_pd(), \
68d872d7
SP
15376 (__mmask8)(U),\
15377 _MM_FROUND_CUR_DIRECTION))
79fb4764
HJ
15378#define _mm512_getmant_ps(X, B, C) \
15379 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
15380 (int)(((C)<<2) | (B)), \
15381 (__v16sf)_mm512_undefined_ps(), \
15382 (__mmask16)-1,\
15383 _MM_FROUND_CUR_DIRECTION))
68d872d7 15384
79fb4764
HJ
15385#define _mm512_mask_getmant_ps(W, U, X, B, C) \
15386 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
15387 (int)(((C)<<2) | (B)), \
15388 (__v16sf)(__m512)(W), \
15389 (__mmask16)(U),\
15390 _MM_FROUND_CUR_DIRECTION))
68d872d7 15391
79fb4764
HJ
15392#define _mm512_maskz_getmant_ps(U, X, B, C) \
15393 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
15394 (int)(((C)<<2) | (B)), \
15395 (__v16sf)_mm512_setzero_ps(), \
15396 (__mmask16)(U),\
15397 _MM_FROUND_CUR_DIRECTION))
756c5857
AI
15398#define _mm512_getexp_ps(A) \
15399 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
0b192937 15400 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
756c5857
AI
15401
15402#define _mm512_mask_getexp_ps(W, U, A) \
15403 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
15404 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
15405
15406#define _mm512_maskz_getexp_ps(U, A) \
15407 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
15408 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
15409
15410#define _mm512_getexp_pd(A) \
15411 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
0b192937 15412 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
756c5857
AI
15413
15414#define _mm512_mask_getexp_pd(W, U, A) \
15415 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
15416 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
15417
15418#define _mm512_maskz_getexp_pd(U, A) \
15419 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
15420 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
15421#endif
15422
15423#ifdef __OPTIMIZE__
15424extern __inline __m512
15425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15426_mm512_roundscale_ps (__m512 __A, const int __imm)
15427{
15428 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
0b192937
UD
15429 (__v16sf)
15430 _mm512_undefined_ps (),
15431 -1,
756c5857
AI
15432 _MM_FROUND_CUR_DIRECTION);
15433}
15434
15435extern __inline __m512
15436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15437_mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
15438 const int __imm)
15439{
15440 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
15441 (__v16sf) __A,
15442 (__mmask16) __B,
15443 _MM_FROUND_CUR_DIRECTION);
15444}
15445
15446extern __inline __m512
15447__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15448_mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
15449{
15450 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
15451 __imm,
15452 (__v16sf)
15453 _mm512_setzero_ps (),
15454 (__mmask16) __A,
15455 _MM_FROUND_CUR_DIRECTION);
15456}
15457
15458extern __inline __m512d
15459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15460_mm512_roundscale_pd (__m512d __A, const int __imm)
15461{
15462 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
0b192937
UD
15463 (__v8df)
15464 _mm512_undefined_pd (),
15465 -1,
756c5857
AI
15466 _MM_FROUND_CUR_DIRECTION);
15467}
15468
15469extern __inline __m512d
15470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15471_mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
15472 const int __imm)
15473{
15474 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
15475 (__v8df) __A,
15476 (__mmask8) __B,
15477 _MM_FROUND_CUR_DIRECTION);
15478}
15479
15480extern __inline __m512d
15481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15482_mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
15483{
15484 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
15485 __imm,
15486 (__v8df)
15487 _mm512_setzero_pd (),
15488 (__mmask8) __A,
15489 _MM_FROUND_CUR_DIRECTION);
15490}
15491
15492#else
15493#define _mm512_roundscale_ps(A, B) \
15494 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
0b192937 15495 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857
AI
15496#define _mm512_mask_roundscale_ps(A, B, C, D) \
15497 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
15498 (int)(D), \
15499 (__v16sf)(__m512)(A), \
15500 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
15501#define _mm512_maskz_roundscale_ps(A, B, C) \
15502 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
15503 (int)(C), \
15504 (__v16sf)_mm512_setzero_ps(),\
15505 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
15506#define _mm512_roundscale_pd(A, B) \
15507 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
0b192937 15508 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
756c5857
AI
15509#define _mm512_mask_roundscale_pd(A, B, C, D) \
15510 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
15511 (int)(D), \
15512 (__v8df)(__m512d)(A), \
15513 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
15514#define _mm512_maskz_roundscale_pd(A, B, C) \
15515 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
15516 (int)(C), \
15517 (__v8df)_mm512_setzero_pd(),\
15518 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
15519#endif
15520
15521#ifdef __OPTIMIZE__
15522extern __inline __mmask8
15523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15524_mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
15525{
15526 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15527 (__v8df) __Y, __P,
15528 (__mmask8) -1,
15529 _MM_FROUND_CUR_DIRECTION);
15530}
15531
15532extern __inline __mmask16
15533__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15534_mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
15535{
15536 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15537 (__v16sf) __Y, __P,
15538 (__mmask16) -1,
15539 _MM_FROUND_CUR_DIRECTION);
15540}
15541
15542extern __inline __mmask16
15543__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15544_mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
15545{
15546 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15547 (__v16sf) __Y, __P,
15548 (__mmask16) __U,
15549 _MM_FROUND_CUR_DIRECTION);
15550}
15551
15552extern __inline __mmask8
15553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15554_mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
15555{
15556 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15557 (__v8df) __Y, __P,
15558 (__mmask8) __U,
15559 _MM_FROUND_CUR_DIRECTION);
15560}
15561
12d69dbf
JJ
15562#else
15563#define _mm512_cmp_pd_mask(X, Y, P) \
15564 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
15565 (__v8df)(__m512d)(Y), (int)(P),\
15566 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
15567
15568#define _mm512_cmp_ps_mask(X, Y, P) \
15569 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
15570 (__v16sf)(__m512)(Y), (int)(P),\
15571 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
15572
15573#define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
15574 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
15575 (__v8df)(__m512d)(Y), (int)(P),\
15576 (__mmask8)(M), _MM_FROUND_CUR_DIRECTION))
15577
15578#define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
15579 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
15580 (__v16sf)(__m512)(Y), (int)(P),\
15581 (__mmask16)(M),_MM_FROUND_CUR_DIRECTION))
15582
12d69dbf
JJ
15583#endif
15584
7e23f4a6
OM
15585extern __inline __mmask8
15586__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15587_mm512_cmpeq_pd_mask (__m512d __X, __m512d __Y)
15588{
15589 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15590 (__v8df) __Y, _CMP_EQ_OQ,
15591 (__mmask8) -1,
15592 _MM_FROUND_CUR_DIRECTION);
15593}
15594
15595extern __inline __mmask8
15596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15597_mm512_mask_cmpeq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15598{
15599 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15600 (__v8df) __Y, _CMP_EQ_OQ,
15601 (__mmask8) __U,
15602 _MM_FROUND_CUR_DIRECTION);
15603}
15604
15605extern __inline __mmask8
15606__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15607_mm512_cmplt_pd_mask (__m512d __X, __m512d __Y)
15608{
15609 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15610 (__v8df) __Y, _CMP_LT_OS,
15611 (__mmask8) -1,
15612 _MM_FROUND_CUR_DIRECTION);
15613}
15614
15615extern __inline __mmask8
15616__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15617_mm512_mask_cmplt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15618{
15619 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15620 (__v8df) __Y, _CMP_LT_OS,
15621 (__mmask8) __U,
15622 _MM_FROUND_CUR_DIRECTION);
15623}
15624
15625extern __inline __mmask8
15626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15627_mm512_cmple_pd_mask (__m512d __X, __m512d __Y)
15628{
15629 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15630 (__v8df) __Y, _CMP_LE_OS,
15631 (__mmask8) -1,
15632 _MM_FROUND_CUR_DIRECTION);
15633}
15634
15635extern __inline __mmask8
15636__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15637_mm512_mask_cmple_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15638{
15639 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15640 (__v8df) __Y, _CMP_LE_OS,
15641 (__mmask8) __U,
15642 _MM_FROUND_CUR_DIRECTION);
15643}
15644
15645extern __inline __mmask8
15646__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15647_mm512_cmpunord_pd_mask (__m512d __X, __m512d __Y)
15648{
15649 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15650 (__v8df) __Y, _CMP_UNORD_Q,
15651 (__mmask8) -1,
15652 _MM_FROUND_CUR_DIRECTION);
15653}
15654
15655extern __inline __mmask8
15656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15657_mm512_mask_cmpunord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15658{
15659 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15660 (__v8df) __Y, _CMP_UNORD_Q,
15661 (__mmask8) __U,
15662 _MM_FROUND_CUR_DIRECTION);
15663}
15664
15665extern __inline __mmask8
15666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15667_mm512_cmpneq_pd_mask (__m512d __X, __m512d __Y)
15668{
15669 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15670 (__v8df) __Y, _CMP_NEQ_UQ,
15671 (__mmask8) -1,
15672 _MM_FROUND_CUR_DIRECTION);
15673}
15674
15675extern __inline __mmask8
15676__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15677_mm512_mask_cmpneq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15678{
15679 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15680 (__v8df) __Y, _CMP_NEQ_UQ,
15681 (__mmask8) __U,
15682 _MM_FROUND_CUR_DIRECTION);
15683}
15684
15685extern __inline __mmask8
15686__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15687_mm512_cmpnlt_pd_mask (__m512d __X, __m512d __Y)
15688{
15689 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15690 (__v8df) __Y, _CMP_NLT_US,
15691 (__mmask8) -1,
15692 _MM_FROUND_CUR_DIRECTION);
15693}
15694
15695extern __inline __mmask8
15696__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15697_mm512_mask_cmpnlt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15698{
15699 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15700 (__v8df) __Y, _CMP_NLT_US,
15701 (__mmask8) __U,
15702 _MM_FROUND_CUR_DIRECTION);
15703}
15704
15705extern __inline __mmask8
15706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15707_mm512_cmpnle_pd_mask (__m512d __X, __m512d __Y)
15708{
15709 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15710 (__v8df) __Y, _CMP_NLE_US,
15711 (__mmask8) -1,
15712 _MM_FROUND_CUR_DIRECTION);
15713}
15714
15715extern __inline __mmask8
15716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15717_mm512_mask_cmpnle_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15718{
15719 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15720 (__v8df) __Y, _CMP_NLE_US,
15721 (__mmask8) __U,
15722 _MM_FROUND_CUR_DIRECTION);
15723}
15724
15725extern __inline __mmask8
15726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15727_mm512_cmpord_pd_mask (__m512d __X, __m512d __Y)
15728{
15729 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15730 (__v8df) __Y, _CMP_ORD_Q,
15731 (__mmask8) -1,
15732 _MM_FROUND_CUR_DIRECTION);
15733}
15734
15735extern __inline __mmask8
15736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15737_mm512_mask_cmpord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15738{
15739 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15740 (__v8df) __Y, _CMP_ORD_Q,
15741 (__mmask8) __U,
15742 _MM_FROUND_CUR_DIRECTION);
15743}
15744
15745extern __inline __mmask16
15746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15747_mm512_cmpeq_ps_mask (__m512 __X, __m512 __Y)
15748{
15749 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15750 (__v16sf) __Y, _CMP_EQ_OQ,
15751 (__mmask16) -1,
15752 _MM_FROUND_CUR_DIRECTION);
15753}
15754
15755extern __inline __mmask16
15756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15757_mm512_mask_cmpeq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15758{
15759 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15760 (__v16sf) __Y, _CMP_EQ_OQ,
15761 (__mmask16) __U,
15762 _MM_FROUND_CUR_DIRECTION);
15763}
15764
15765extern __inline __mmask16
15766__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15767_mm512_cmplt_ps_mask (__m512 __X, __m512 __Y)
15768{
15769 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15770 (__v16sf) __Y, _CMP_LT_OS,
15771 (__mmask16) -1,
15772 _MM_FROUND_CUR_DIRECTION);
15773}
15774
15775extern __inline __mmask16
15776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15777_mm512_mask_cmplt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15778{
15779 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15780 (__v16sf) __Y, _CMP_LT_OS,
15781 (__mmask16) __U,
15782 _MM_FROUND_CUR_DIRECTION);
15783}
15784
15785extern __inline __mmask16
15786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15787_mm512_cmple_ps_mask (__m512 __X, __m512 __Y)
15788{
15789 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15790 (__v16sf) __Y, _CMP_LE_OS,
15791 (__mmask16) -1,
15792 _MM_FROUND_CUR_DIRECTION);
15793}
15794
15795extern __inline __mmask16
15796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15797_mm512_mask_cmple_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15798{
15799 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15800 (__v16sf) __Y, _CMP_LE_OS,
15801 (__mmask16) __U,
15802 _MM_FROUND_CUR_DIRECTION);
15803}
15804
15805extern __inline __mmask16
15806__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15807_mm512_cmpunord_ps_mask (__m512 __X, __m512 __Y)
15808{
15809 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15810 (__v16sf) __Y, _CMP_UNORD_Q,
15811 (__mmask16) -1,
15812 _MM_FROUND_CUR_DIRECTION);
15813}
15814
15815extern __inline __mmask16
15816__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15817_mm512_mask_cmpunord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15818{
15819 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15820 (__v16sf) __Y, _CMP_UNORD_Q,
15821 (__mmask16) __U,
15822 _MM_FROUND_CUR_DIRECTION);
15823}
15824
15825extern __inline __mmask16
15826__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15827_mm512_cmpneq_ps_mask (__m512 __X, __m512 __Y)
15828{
15829 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15830 (__v16sf) __Y, _CMP_NEQ_UQ,
15831 (__mmask16) -1,
15832 _MM_FROUND_CUR_DIRECTION);
15833}
15834
15835extern __inline __mmask16
15836__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15837_mm512_mask_cmpneq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15838{
15839 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15840 (__v16sf) __Y, _CMP_NEQ_UQ,
15841 (__mmask16) __U,
15842 _MM_FROUND_CUR_DIRECTION);
15843}
15844
15845extern __inline __mmask16
15846__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15847_mm512_cmpnlt_ps_mask (__m512 __X, __m512 __Y)
15848{
15849 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15850 (__v16sf) __Y, _CMP_NLT_US,
15851 (__mmask16) -1,
15852 _MM_FROUND_CUR_DIRECTION);
15853}
15854
15855extern __inline __mmask16
15856__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15857_mm512_mask_cmpnlt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15858{
15859 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15860 (__v16sf) __Y, _CMP_NLT_US,
15861 (__mmask16) __U,
15862 _MM_FROUND_CUR_DIRECTION);
15863}
15864
15865extern __inline __mmask16
15866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15867_mm512_cmpnle_ps_mask (__m512 __X, __m512 __Y)
15868{
15869 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15870 (__v16sf) __Y, _CMP_NLE_US,
15871 (__mmask16) -1,
15872 _MM_FROUND_CUR_DIRECTION);
15873}
15874
15875extern __inline __mmask16
15876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15877_mm512_mask_cmpnle_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15878{
15879 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15880 (__v16sf) __Y, _CMP_NLE_US,
15881 (__mmask16) __U,
15882 _MM_FROUND_CUR_DIRECTION);
15883}
15884
15885extern __inline __mmask16
15886__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15887_mm512_cmpord_ps_mask (__m512 __X, __m512 __Y)
15888{
15889 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15890 (__v16sf) __Y, _CMP_ORD_Q,
15891 (__mmask16) -1,
15892 _MM_FROUND_CUR_DIRECTION);
15893}
15894
15895extern __inline __mmask16
15896__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15897_mm512_mask_cmpord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15898{
15899 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15900 (__v16sf) __Y, _CMP_ORD_Q,
15901 (__mmask16) __U,
15902 _MM_FROUND_CUR_DIRECTION);
15903}
15904
2196a885
KY
15905extern __inline __mmask16
15906__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15907_mm512_kmov (__mmask16 __A)
15908{
7cdb6e4c 15909 return __builtin_ia32_kmovw (__A);
2196a885
KY
15910}
15911
275be1da
IT
15912extern __inline __m512
15913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15914_mm512_castpd_ps (__m512d __A)
15915{
15916 return (__m512) (__A);
15917}
15918
15919extern __inline __m512i
15920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15921_mm512_castpd_si512 (__m512d __A)
15922{
15923 return (__m512i) (__A);
15924}
15925
15926extern __inline __m512d
15927__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15928_mm512_castps_pd (__m512 __A)
15929{
15930 return (__m512d) (__A);
15931}
15932
15933extern __inline __m512i
15934__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15935_mm512_castps_si512 (__m512 __A)
15936{
15937 return (__m512i) (__A);
15938}
15939
15940extern __inline __m512
15941__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15942_mm512_castsi512_ps (__m512i __A)
15943{
15944 return (__m512) (__A);
15945}
15946
15947extern __inline __m512d
15948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15949_mm512_castsi512_pd (__m512i __A)
15950{
15951 return (__m512d) (__A);
15952}
15953
15954extern __inline __m128d
15955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15956_mm512_castpd512_pd128 (__m512d __A)
15957{
15958 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
15959}
15960
15961extern __inline __m128
15962__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15963_mm512_castps512_ps128 (__m512 __A)
15964{
15965 return _mm512_extractf32x4_ps(__A, 0);
15966}
15967
15968extern __inline __m128i
15969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15970_mm512_castsi512_si128 (__m512i __A)
15971{
15972 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
15973}
15974
15975extern __inline __m256d
15976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15977_mm512_castpd512_pd256 (__m512d __A)
15978{
15979 return _mm512_extractf64x4_pd(__A, 0);
15980}
15981
15982extern __inline __m256
15983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15984_mm512_castps512_ps256 (__m512 __A)
15985{
15986 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
15987}
15988
15989extern __inline __m256i
15990__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15991_mm512_castsi512_si256 (__m512i __A)
15992{
15993 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
15994}
15995
15996extern __inline __m512d
15997__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15998_mm512_castpd128_pd512 (__m128d __A)
15999{
16000 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
16001}
16002
16003extern __inline __m512
16004__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16005_mm512_castps128_ps512 (__m128 __A)
16006{
16007 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
16008}
16009
16010extern __inline __m512i
16011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16012_mm512_castsi128_si512 (__m128i __A)
16013{
16014 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
16015}
16016
16017extern __inline __m512d
16018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16019_mm512_castpd256_pd512 (__m256d __A)
16020{
16021 return __builtin_ia32_pd512_256pd (__A);
16022}
16023
16024extern __inline __m512
16025__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16026_mm512_castps256_ps512 (__m256 __A)
16027{
16028 return __builtin_ia32_ps512_256ps (__A);
16029}
16030
16031extern __inline __m512i
16032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16033_mm512_castsi256_si512 (__m256i __A)
16034{
16035 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
16036}
16037
e6b2dc24
JJ
16038extern __inline __m512d
16039__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16040_mm512_zextpd128_pd512 (__m128d __A)
16041{
16042 return (__m512d) _mm512_insertf32x4 (_mm512_setzero_ps (), (__m128) __A, 0);
16043}
16044
16045extern __inline __m512
16046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16047_mm512_zextps128_ps512 (__m128 __A)
16048{
16049 return _mm512_insertf32x4 (_mm512_setzero_ps (), __A, 0);
16050}
16051
16052extern __inline __m512i
16053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16054_mm512_zextsi128_si512 (__m128i __A)
16055{
16056 return _mm512_inserti32x4 (_mm512_setzero_si512 (), __A, 0);
16057}
16058
16059extern __inline __m512d
16060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16061_mm512_zextpd256_pd512 (__m256d __A)
16062{
16063 return _mm512_insertf64x4 (_mm512_setzero_pd (), __A, 0);
16064}
16065
16066extern __inline __m512
16067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16068_mm512_zextps256_ps512 (__m256 __A)
16069{
16070 return (__m512) _mm512_insertf64x4 (_mm512_setzero_pd (), (__m256d) __A, 0);
16071}
16072
16073extern __inline __m512i
16074__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16075_mm512_zextsi256_si512 (__m256i __A)
16076{
16077 return _mm512_inserti64x4 (_mm512_setzero_si512 (), __A, 0);
16078}
16079
275be1da
IT
16080extern __inline __mmask16
16081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16082_mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
16083{
16084 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
16085 (__v16si) __B, 0,
16086 (__mmask16) -1);
16087}
16088
16089extern __inline __mmask16
16090__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16091_mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
16092{
16093 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
16094 (__v16si) __B, 0, __U);
16095}
16096
16097extern __inline __mmask8
16098__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16099_mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
16100{
16101 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
16102 (__v8di) __B, 0, __U);
16103}
16104
16105extern __inline __mmask8
16106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16107_mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
16108{
16109 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
16110 (__v8di) __B, 0,
16111 (__mmask8) -1);
16112}
16113
16114extern __inline __mmask16
16115__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16116_mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
16117{
16118 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
16119 (__v16si) __B, 6,
16120 (__mmask16) -1);
16121}
16122
16123extern __inline __mmask16
16124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16125_mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
16126{
16127 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
16128 (__v16si) __B, 6, __U);
16129}
16130
16131extern __inline __mmask8
16132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16133_mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
16134{
16135 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
16136 (__v8di) __B, 6, __U);
16137}
16138
16139extern __inline __mmask8
16140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16141_mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
16142{
16143 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
16144 (__v8di) __B, 6,
16145 (__mmask8) -1);
16146}
16147
167a5b77
JJ
16148#undef __MM512_REDUCE_OP
16149#define __MM512_REDUCE_OP(op) \
16150 __v8si __T1 = (__v8si) _mm512_extracti64x4_epi64 (__A, 1); \
16151 __v8si __T2 = (__v8si) _mm512_extracti64x4_epi64 (__A, 0); \
16152 __m256i __T3 = (__m256i) (__T1 op __T2); \
16153 __v4si __T4 = (__v4si) _mm256_extracti128_si256 (__T3, 1); \
16154 __v4si __T5 = (__v4si) _mm256_extracti128_si256 (__T3, 0); \
16155 __v4si __T6 = __T4 op __T5; \
16156 __v4si __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
16157 __v4si __T8 = __T6 op __T7; \
16158 return __T8[0] op __T8[1]
16159
16160extern __inline int
16161__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16162_mm512_reduce_add_epi32 (__m512i __A)
16163{
16164 __MM512_REDUCE_OP (+);
16165}
16166
16167extern __inline int
16168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16169_mm512_reduce_mul_epi32 (__m512i __A)
16170{
16171 __MM512_REDUCE_OP (*);
16172}
16173
16174extern __inline int
16175__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16176_mm512_reduce_and_epi32 (__m512i __A)
16177{
16178 __MM512_REDUCE_OP (&);
16179}
16180
16181extern __inline int
16182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16183_mm512_reduce_or_epi32 (__m512i __A)
16184{
16185 __MM512_REDUCE_OP (|);
16186}
16187
16188extern __inline int
16189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16190_mm512_mask_reduce_add_epi32 (__mmask16 __U, __m512i __A)
16191{
16192 __A = _mm512_maskz_mov_epi32 (__U, __A);
16193 __MM512_REDUCE_OP (+);
16194}
16195
16196extern __inline int
16197__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16198_mm512_mask_reduce_mul_epi32 (__mmask16 __U, __m512i __A)
16199{
16200 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (1), __U, __A);
16201 __MM512_REDUCE_OP (*);
16202}
16203
16204extern __inline int
16205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16206_mm512_mask_reduce_and_epi32 (__mmask16 __U, __m512i __A)
16207{
16208 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
16209 __MM512_REDUCE_OP (&);
16210}
16211
16212extern __inline int
16213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16214_mm512_mask_reduce_or_epi32 (__mmask16 __U, __m512i __A)
16215{
16216 __A = _mm512_maskz_mov_epi32 (__U, __A);
16217 __MM512_REDUCE_OP (|);
16218}
16219
16220#undef __MM512_REDUCE_OP
16221#define __MM512_REDUCE_OP(op) \
16222 __m256i __T1 = (__m256i) _mm512_extracti64x4_epi64 (__A, 1); \
16223 __m256i __T2 = (__m256i) _mm512_extracti64x4_epi64 (__A, 0); \
16224 __m256i __T3 = _mm256_##op (__T1, __T2); \
16225 __m128i __T4 = (__m128i) _mm256_extracti128_si256 (__T3, 1); \
16226 __m128i __T5 = (__m128i) _mm256_extracti128_si256 (__T3, 0); \
16227 __m128i __T6 = _mm_##op (__T4, __T5); \
16228 __m128i __T7 = (__m128i) __builtin_shuffle ((__v4si) __T6, \
16229 (__v4si) { 2, 3, 0, 1 }); \
16230 __m128i __T8 = _mm_##op (__T6, __T7); \
16231 __m128i __T9 = (__m128i) __builtin_shuffle ((__v4si) __T8, \
16232 (__v4si) { 1, 0, 1, 0 }); \
16233 __v4si __T10 = (__v4si) _mm_##op (__T8, __T9); \
16234 return __T10[0]
16235
16236extern __inline int
16237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16238_mm512_reduce_min_epi32 (__m512i __A)
16239{
16240 __MM512_REDUCE_OP (min_epi32);
16241}
16242
16243extern __inline int
16244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16245_mm512_reduce_max_epi32 (__m512i __A)
16246{
16247 __MM512_REDUCE_OP (max_epi32);
16248}
16249
16250extern __inline unsigned int
16251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16252_mm512_reduce_min_epu32 (__m512i __A)
16253{
16254 __MM512_REDUCE_OP (min_epu32);
16255}
16256
16257extern __inline unsigned int
16258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16259_mm512_reduce_max_epu32 (__m512i __A)
16260{
16261 __MM512_REDUCE_OP (max_epu32);
16262}
16263
16264extern __inline int
16265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16266_mm512_mask_reduce_min_epi32 (__mmask16 __U, __m512i __A)
16267{
16268 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (__INT_MAX__), __U, __A);
16269 __MM512_REDUCE_OP (min_epi32);
16270}
16271
16272extern __inline int
16273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16274_mm512_mask_reduce_max_epi32 (__mmask16 __U, __m512i __A)
16275{
16276 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (-__INT_MAX__ - 1), __U, __A);
16277 __MM512_REDUCE_OP (max_epi32);
16278}
16279
16280extern __inline unsigned int
16281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16282_mm512_mask_reduce_min_epu32 (__mmask16 __U, __m512i __A)
16283{
16284 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
16285 __MM512_REDUCE_OP (min_epu32);
16286}
16287
16288extern __inline unsigned int
16289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16290_mm512_mask_reduce_max_epu32 (__mmask16 __U, __m512i __A)
16291{
16292 __A = _mm512_maskz_mov_epi32 (__U, __A);
16293 __MM512_REDUCE_OP (max_epu32);
16294}
16295
16296#undef __MM512_REDUCE_OP
16297#define __MM512_REDUCE_OP(op) \
16298 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
16299 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
16300 __m256 __T3 = __T1 op __T2; \
16301 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
16302 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
16303 __m128 __T6 = __T4 op __T5; \
16304 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
16305 __m128 __T8 = __T6 op __T7; \
16306 return __T8[0] op __T8[1]
16307
16308extern __inline float
16309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16310_mm512_reduce_add_ps (__m512 __A)
16311{
16312 __MM512_REDUCE_OP (+);
16313}
16314
16315extern __inline float
16316__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16317_mm512_reduce_mul_ps (__m512 __A)
16318{
16319 __MM512_REDUCE_OP (*);
16320}
16321
16322extern __inline float
16323__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16324_mm512_mask_reduce_add_ps (__mmask16 __U, __m512 __A)
16325{
16326 __A = _mm512_maskz_mov_ps (__U, __A);
16327 __MM512_REDUCE_OP (+);
16328}
16329
16330extern __inline float
16331__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16332_mm512_mask_reduce_mul_ps (__mmask16 __U, __m512 __A)
16333{
16334 __A = _mm512_mask_mov_ps (_mm512_set1_ps (1.0f), __U, __A);
16335 __MM512_REDUCE_OP (*);
16336}
16337
16338#undef __MM512_REDUCE_OP
16339#define __MM512_REDUCE_OP(op) \
16340 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
16341 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
16342 __m256 __T3 = _mm256_##op (__T1, __T2); \
16343 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
16344 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
16345 __m128 __T6 = _mm_##op (__T4, __T5); \
16346 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
16347 __m128 __T8 = _mm_##op (__T6, __T7); \
16348 __m128 __T9 = __builtin_shuffle (__T8, (__v4si) { 1, 0, 1, 0 }); \
16349 __m128 __T10 = _mm_##op (__T8, __T9); \
16350 return __T10[0]
16351
16352extern __inline float
16353__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16354_mm512_reduce_min_ps (__m512 __A)
16355{
16356 __MM512_REDUCE_OP (min_ps);
16357}
16358
16359extern __inline float
16360__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16361_mm512_reduce_max_ps (__m512 __A)
16362{
16363 __MM512_REDUCE_OP (max_ps);
16364}
16365
16366extern __inline float
16367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16368_mm512_mask_reduce_min_ps (__mmask16 __U, __m512 __A)
16369{
16370 __A = _mm512_mask_mov_ps (_mm512_set1_ps (__builtin_inff ()), __U, __A);
16371 __MM512_REDUCE_OP (min_ps);
16372}
16373
16374extern __inline float
16375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16376_mm512_mask_reduce_max_ps (__mmask16 __U, __m512 __A)
16377{
16378 __A = _mm512_mask_mov_ps (_mm512_set1_ps (-__builtin_inff ()), __U, __A);
16379 __MM512_REDUCE_OP (max_ps);
16380}
16381
16382#undef __MM512_REDUCE_OP
16383#define __MM512_REDUCE_OP(op) \
16384 __v4di __T1 = (__v4di) _mm512_extracti64x4_epi64 (__A, 1); \
16385 __v4di __T2 = (__v4di) _mm512_extracti64x4_epi64 (__A, 0); \
16386 __m256i __T3 = (__m256i) (__T1 op __T2); \
16387 __v2di __T4 = (__v2di) _mm256_extracti128_si256 (__T3, 1); \
16388 __v2di __T5 = (__v2di) _mm256_extracti128_si256 (__T3, 0); \
16389 __v2di __T6 = __T4 op __T5; \
16390 return __T6[0] op __T6[1]
16391
16392extern __inline long long
16393__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16394_mm512_reduce_add_epi64 (__m512i __A)
16395{
16396 __MM512_REDUCE_OP (+);
16397}
16398
16399extern __inline long long
16400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16401_mm512_reduce_mul_epi64 (__m512i __A)
16402{
16403 __MM512_REDUCE_OP (*);
16404}
16405
16406extern __inline long long
16407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16408_mm512_reduce_and_epi64 (__m512i __A)
16409{
16410 __MM512_REDUCE_OP (&);
16411}
16412
16413extern __inline long long
16414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16415_mm512_reduce_or_epi64 (__m512i __A)
16416{
16417 __MM512_REDUCE_OP (|);
16418}
16419
16420extern __inline long long
16421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16422_mm512_mask_reduce_add_epi64 (__mmask8 __U, __m512i __A)
16423{
16424 __A = _mm512_maskz_mov_epi64 (__U, __A);
16425 __MM512_REDUCE_OP (+);
16426}
16427
16428extern __inline long long
16429__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16430_mm512_mask_reduce_mul_epi64 (__mmask8 __U, __m512i __A)
16431{
16432 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (1LL), __U, __A);
16433 __MM512_REDUCE_OP (*);
16434}
16435
16436extern __inline long long
16437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16438_mm512_mask_reduce_and_epi64 (__mmask8 __U, __m512i __A)
16439{
16440 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
16441 __MM512_REDUCE_OP (&);
16442}
16443
16444extern __inline long long
16445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16446_mm512_mask_reduce_or_epi64 (__mmask8 __U, __m512i __A)
16447{
16448 __A = _mm512_maskz_mov_epi64 (__U, __A);
16449 __MM512_REDUCE_OP (|);
16450}
16451
16452#undef __MM512_REDUCE_OP
16453#define __MM512_REDUCE_OP(op) \
16454 __m512i __T1 = _mm512_shuffle_i64x2 (__A, __A, 0x4e); \
16455 __m512i __T2 = _mm512_##op (__A, __T1); \
16456 __m512i __T3 \
16457 = (__m512i) __builtin_shuffle ((__v8di) __T2, \
16458 (__v8di) { 2, 3, 0, 1, 6, 7, 4, 5 });\
16459 __m512i __T4 = _mm512_##op (__T2, __T3); \
16460 __m512i __T5 \
16461 = (__m512i) __builtin_shuffle ((__v8di) __T4, \
16462 (__v8di) { 1, 0, 3, 2, 5, 4, 7, 6 });\
16463 __v8di __T6 = (__v8di) _mm512_##op (__T4, __T5); \
16464 return __T6[0]
16465
16466extern __inline long long
16467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16468_mm512_reduce_min_epi64 (__m512i __A)
16469{
16470 __MM512_REDUCE_OP (min_epi64);
16471}
16472
16473extern __inline long long
16474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16475_mm512_reduce_max_epi64 (__m512i __A)
16476{
16477 __MM512_REDUCE_OP (max_epi64);
16478}
16479
16480extern __inline long long
16481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16482_mm512_mask_reduce_min_epi64 (__mmask8 __U, __m512i __A)
16483{
16484 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (__LONG_LONG_MAX__),
16485 __U, __A);
16486 __MM512_REDUCE_OP (min_epi64);
16487}
16488
16489extern __inline long long
16490__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16491_mm512_mask_reduce_max_epi64 (__mmask8 __U, __m512i __A)
16492{
16493 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (-__LONG_LONG_MAX__ - 1),
16494 __U, __A);
16495 __MM512_REDUCE_OP (max_epi64);
16496}
16497
16498extern __inline unsigned long long
16499__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16500_mm512_reduce_min_epu64 (__m512i __A)
16501{
16502 __MM512_REDUCE_OP (min_epu64);
16503}
16504
16505extern __inline unsigned long long
16506__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16507_mm512_reduce_max_epu64 (__m512i __A)
16508{
16509 __MM512_REDUCE_OP (max_epu64);
16510}
16511
16512extern __inline unsigned long long
16513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16514_mm512_mask_reduce_min_epu64 (__mmask8 __U, __m512i __A)
16515{
16516 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
16517 __MM512_REDUCE_OP (min_epu64);
16518}
16519
16520extern __inline unsigned long long
16521__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16522_mm512_mask_reduce_max_epu64 (__mmask8 __U, __m512i __A)
16523{
16524 __A = _mm512_maskz_mov_epi64 (__U, __A);
16525 __MM512_REDUCE_OP (max_epu64);
16526}
16527
16528#undef __MM512_REDUCE_OP
16529#define __MM512_REDUCE_OP(op) \
16530 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
16531 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
16532 __m256d __T3 = __T1 op __T2; \
16533 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
16534 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
16535 __m128d __T6 = __T4 op __T5; \
16536 return __T6[0] op __T6[1]
16537
16538extern __inline double
16539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16540_mm512_reduce_add_pd (__m512d __A)
16541{
16542 __MM512_REDUCE_OP (+);
16543}
16544
16545extern __inline double
16546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16547_mm512_reduce_mul_pd (__m512d __A)
16548{
16549 __MM512_REDUCE_OP (*);
16550}
16551
16552extern __inline double
16553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16554_mm512_mask_reduce_add_pd (__mmask8 __U, __m512d __A)
16555{
16556 __A = _mm512_maskz_mov_pd (__U, __A);
16557 __MM512_REDUCE_OP (+);
16558}
16559
16560extern __inline double
16561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16562_mm512_mask_reduce_mul_pd (__mmask8 __U, __m512d __A)
16563{
16564 __A = _mm512_mask_mov_pd (_mm512_set1_pd (1.0), __U, __A);
16565 __MM512_REDUCE_OP (*);
16566}
16567
16568#undef __MM512_REDUCE_OP
16569#define __MM512_REDUCE_OP(op) \
16570 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
16571 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
16572 __m256d __T3 = _mm256_##op (__T1, __T2); \
16573 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
16574 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
16575 __m128d __T6 = _mm_##op (__T4, __T5); \
16576 __m128d __T7 = (__m128d) __builtin_shuffle (__T6, (__v2di) { 1, 0 }); \
16577 __m128d __T8 = _mm_##op (__T6, __T7); \
16578 return __T8[0]
16579
16580extern __inline double
16581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16582_mm512_reduce_min_pd (__m512d __A)
16583{
16584 __MM512_REDUCE_OP (min_pd);
16585}
16586
16587extern __inline double
16588__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16589_mm512_reduce_max_pd (__m512d __A)
16590{
16591 __MM512_REDUCE_OP (max_pd);
16592}
16593
16594extern __inline double
16595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16596_mm512_mask_reduce_min_pd (__mmask8 __U, __m512d __A)
16597{
16598 __A = _mm512_mask_mov_pd (_mm512_set1_pd (__builtin_inf ()), __U, __A);
16599 __MM512_REDUCE_OP (min_pd);
16600}
16601
16602extern __inline double
16603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16604_mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A)
16605{
16606 __A = _mm512_mask_mov_pd (_mm512_set1_pd (-__builtin_inf ()), __U, __A);
16607 __MM512_REDUCE_OP (max_pd);
16608}
16609
16610#undef __MM512_REDUCE_OP
16611
79fb4764
HJ
16612#ifdef __DISABLE_AVX512F_512__
16613#undef __DISABLE_AVX512F_512__
756c5857 16614#pragma GCC pop_options
79fb4764 16615#endif /* __DISABLE_AVX512F_512__ */
756c5857
AI
16616
16617#endif /* _AVX512FINTRIN_H_INCLUDED */