]>
git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/avx512erintrin.h
1 /* Copyright (C) 2013-2024 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
28 #ifndef _AVX512ERINTRIN_H_INCLUDED
29 #define _AVX512ERINTRIN_H_INCLUDED
32 #pragma GCC push_options
33 #pragma GCC target("avx512er,evex512")
34 #define __DISABLE_AVX512ER__
35 #endif /* __AVX512ER__ */
37 /* Internal data types for implementing the intrinsics. */
38 typedef double __v8df
__attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf
__attribute__ ((__vector_size__ (64)));
41 /* The Intel API is flexible enough that we must allow aliasing with other
42 vector types, and their scalar components. */
43 typedef float __m512
__attribute__ ((__vector_size__ (64), __may_alias__
));
44 typedef double __m512d
__attribute__ ((__vector_size__ (64), __may_alias__
));
46 typedef unsigned char __mmask8
;
47 typedef unsigned short __mmask16
;
50 extern __inline __m512d
51 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
52 _mm512_exp2a23_round_pd (__m512d __A
, int __R
)
54 return (__m512d
) __builtin_ia32_exp2pd_mask ((__v8df
) __A
,
55 (__v8df
) _mm512_undefined_pd (),
59 extern __inline __m512d
60 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
61 _mm512_mask_exp2a23_round_pd (__m512d __W
, __mmask8 __U
, __m512d __A
, int __R
)
63 return (__m512d
) __builtin_ia32_exp2pd_mask ((__v8df
) __A
,
68 extern __inline __m512d
69 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
70 _mm512_maskz_exp2a23_round_pd (__mmask8 __U
, __m512d __A
, int __R
)
72 return (__m512d
) __builtin_ia32_exp2pd_mask ((__v8df
) __A
,
73 (__v8df
) _mm512_setzero_pd (),
77 extern __inline __m512
78 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
79 _mm512_exp2a23_round_ps (__m512 __A
, int __R
)
81 return (__m512
) __builtin_ia32_exp2ps_mask ((__v16sf
) __A
,
82 (__v16sf
) _mm512_undefined_ps (),
86 extern __inline __m512
87 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
88 _mm512_mask_exp2a23_round_ps (__m512 __W
, __mmask16 __U
, __m512 __A
, int __R
)
90 return (__m512
) __builtin_ia32_exp2ps_mask ((__v16sf
) __A
,
92 (__mmask16
) __U
, __R
);
95 extern __inline __m512
96 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
97 _mm512_maskz_exp2a23_round_ps (__mmask16 __U
, __m512 __A
, int __R
)
99 return (__m512
) __builtin_ia32_exp2ps_mask ((__v16sf
) __A
,
100 (__v16sf
) _mm512_setzero_ps (),
101 (__mmask16
) __U
, __R
);
104 extern __inline __m512d
105 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
106 _mm512_rcp28_round_pd (__m512d __A
, int __R
)
108 return (__m512d
) __builtin_ia32_rcp28pd_mask ((__v8df
) __A
,
109 (__v8df
) _mm512_undefined_pd (),
113 extern __inline __m512d
114 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
115 _mm512_mask_rcp28_round_pd (__m512d __W
, __mmask8 __U
, __m512d __A
, int __R
)
117 return (__m512d
) __builtin_ia32_rcp28pd_mask ((__v8df
) __A
,
119 (__mmask8
) __U
, __R
);
122 extern __inline __m512d
123 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
124 _mm512_maskz_rcp28_round_pd (__mmask8 __U
, __m512d __A
, int __R
)
126 return (__m512d
) __builtin_ia32_rcp28pd_mask ((__v8df
) __A
,
127 (__v8df
) _mm512_setzero_pd (),
128 (__mmask8
) __U
, __R
);
131 extern __inline __m512
132 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
133 _mm512_rcp28_round_ps (__m512 __A
, int __R
)
135 return (__m512
) __builtin_ia32_rcp28ps_mask ((__v16sf
) __A
,
136 (__v16sf
) _mm512_undefined_ps (),
137 (__mmask16
) -1, __R
);
140 extern __inline __m512
141 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
142 _mm512_mask_rcp28_round_ps (__m512 __W
, __mmask16 __U
, __m512 __A
, int __R
)
144 return (__m512
) __builtin_ia32_rcp28ps_mask ((__v16sf
) __A
,
146 (__mmask16
) __U
, __R
);
149 extern __inline __m512
150 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
151 _mm512_maskz_rcp28_round_ps (__mmask16 __U
, __m512 __A
, int __R
)
153 return (__m512
) __builtin_ia32_rcp28ps_mask ((__v16sf
) __A
,
154 (__v16sf
) _mm512_setzero_ps (),
155 (__mmask16
) __U
, __R
);
158 extern __inline __m128d
159 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
160 _mm_rcp28_round_sd (__m128d __A
, __m128d __B
, int __R
)
162 return (__m128d
) __builtin_ia32_rcp28sd_round ((__v2df
) __B
,
167 extern __inline __m128d
168 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
169 _mm_mask_rcp28_round_sd (__m128d __W
, __mmask8 __U
, __m128d __A
,
170 __m128d __B
, int __R
)
172 return (__m128d
) __builtin_ia32_rcp28sd_mask_round ((__v2df
) __B
,
179 extern __inline __m128d
180 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
181 _mm_maskz_rcp28_round_sd (__mmask8 __U
, __m128d __A
, __m128d __B
, int __R
)
183 return (__m128d
) __builtin_ia32_rcp28sd_mask_round ((__v2df
) __B
,
191 extern __inline __m128
192 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
193 _mm_rcp28_round_ss (__m128 __A
, __m128 __B
, int __R
)
195 return (__m128
) __builtin_ia32_rcp28ss_round ((__v4sf
) __B
,
200 extern __inline __m128
201 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
202 _mm_mask_rcp28_round_ss (__m128 __W
, __mmask8 __U
, __m128 __A
,
205 return (__m128
) __builtin_ia32_rcp28ss_mask_round ((__v4sf
) __B
,
212 extern __inline __m128
213 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
214 _mm_maskz_rcp28_round_ss (__mmask8 __U
, __m128 __A
, __m128 __B
, int __R
)
216 return (__m128
) __builtin_ia32_rcp28ss_mask_round ((__v4sf
) __B
,
224 extern __inline __m512d
225 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
226 _mm512_rsqrt28_round_pd (__m512d __A
, int __R
)
228 return (__m512d
) __builtin_ia32_rsqrt28pd_mask ((__v8df
) __A
,
229 (__v8df
) _mm512_undefined_pd (),
233 extern __inline __m512d
234 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
235 _mm512_mask_rsqrt28_round_pd (__m512d __W
, __mmask8 __U
, __m512d __A
, int __R
)
237 return (__m512d
) __builtin_ia32_rsqrt28pd_mask ((__v8df
) __A
,
239 (__mmask8
) __U
, __R
);
242 extern __inline __m512d
243 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
244 _mm512_maskz_rsqrt28_round_pd (__mmask8 __U
, __m512d __A
, int __R
)
246 return (__m512d
) __builtin_ia32_rsqrt28pd_mask ((__v8df
) __A
,
247 (__v8df
) _mm512_setzero_pd (),
248 (__mmask8
) __U
, __R
);
251 extern __inline __m512
252 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
253 _mm512_rsqrt28_round_ps (__m512 __A
, int __R
)
255 return (__m512
) __builtin_ia32_rsqrt28ps_mask ((__v16sf
) __A
,
256 (__v16sf
) _mm512_undefined_ps (),
257 (__mmask16
) -1, __R
);
260 extern __inline __m512
261 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
262 _mm512_mask_rsqrt28_round_ps (__m512 __W
, __mmask16 __U
, __m512 __A
, int __R
)
264 return (__m512
) __builtin_ia32_rsqrt28ps_mask ((__v16sf
) __A
,
266 (__mmask16
) __U
, __R
);
269 extern __inline __m512
270 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
271 _mm512_maskz_rsqrt28_round_ps (__mmask16 __U
, __m512 __A
, int __R
)
273 return (__m512
) __builtin_ia32_rsqrt28ps_mask ((__v16sf
) __A
,
274 (__v16sf
) _mm512_setzero_ps (),
275 (__mmask16
) __U
, __R
);
278 extern __inline __m128d
279 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
280 _mm_rsqrt28_round_sd (__m128d __A
, __m128d __B
, int __R
)
282 return (__m128d
) __builtin_ia32_rsqrt28sd_round ((__v2df
) __B
,
287 extern __inline __m128d
288 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
289 _mm_mask_rsqrt28_round_sd (__m128d __W
, __mmask8 __U
, __m128d __A
,
290 __m128d __B
, int __R
)
292 return (__m128d
) __builtin_ia32_rsqrt28sd_mask_round ((__v2df
) __B
,
299 extern __inline __m128d
300 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
301 _mm_maskz_rsqrt28_round_sd (__mmask8 __U
, __m128d __A
, __m128d __B
, int __R
)
303 return (__m128d
) __builtin_ia32_rsqrt28sd_mask_round ((__v2df
) __B
,
311 extern __inline __m128
312 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
313 _mm_rsqrt28_round_ss (__m128 __A
, __m128 __B
, int __R
)
315 return (__m128
) __builtin_ia32_rsqrt28ss_round ((__v4sf
) __B
,
320 extern __inline __m128
321 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
322 _mm_mask_rsqrt28_round_ss (__m128 __W
, __mmask8 __U
, __m128 __A
,
325 return (__m128
) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf
) __B
,
332 extern __inline __m128
333 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
334 _mm_maskz_rsqrt28_round_ss (__mmask8 __U
, __m128 __A
, __m128 __B
, int __R
)
336 return (__m128
) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf
) __B
,
345 #define _mm512_exp2a23_round_pd(A, C) \
346 __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
348 #define _mm512_mask_exp2a23_round_pd(W, U, A, C) \
349 __builtin_ia32_exp2pd_mask(A, W, U, C)
351 #define _mm512_maskz_exp2a23_round_pd(U, A, C) \
352 __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
354 #define _mm512_exp2a23_round_ps(A, C) \
355 __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
357 #define _mm512_mask_exp2a23_round_ps(W, U, A, C) \
358 __builtin_ia32_exp2ps_mask(A, W, U, C)
360 #define _mm512_maskz_exp2a23_round_ps(U, A, C) \
361 __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
363 #define _mm512_rcp28_round_pd(A, C) \
364 __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
366 #define _mm512_mask_rcp28_round_pd(W, U, A, C) \
367 __builtin_ia32_rcp28pd_mask(A, W, U, C)
369 #define _mm512_maskz_rcp28_round_pd(U, A, C) \
370 __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
372 #define _mm512_rcp28_round_ps(A, C) \
373 __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
375 #define _mm512_mask_rcp28_round_ps(W, U, A, C) \
376 __builtin_ia32_rcp28ps_mask(A, W, U, C)
378 #define _mm512_maskz_rcp28_round_ps(U, A, C) \
379 __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
381 #define _mm512_rsqrt28_round_pd(A, C) \
382 __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
384 #define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
385 __builtin_ia32_rsqrt28pd_mask(A, W, U, C)
387 #define _mm512_maskz_rsqrt28_round_pd(U, A, C) \
388 __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
390 #define _mm512_rsqrt28_round_ps(A, C) \
391 __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
393 #define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
394 __builtin_ia32_rsqrt28ps_mask(A, W, U, C)
396 #define _mm512_maskz_rsqrt28_round_ps(U, A, C) \
397 __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
399 #define _mm_rcp28_round_sd(A, B, R) \
400 __builtin_ia32_rcp28sd_round(A, B, R)
402 #define _mm_mask_rcp28_round_sd(W, U, A, B, R) \
403 __builtin_ia32_rcp28sd_mask_round ((A), (B), (W), (U), (R))
405 #define _mm_maskz_rcp28_round_sd(U, A, B, R) \
406 __builtin_ia32_rcp28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), \
409 #define _mm_rcp28_round_ss(A, B, R) \
410 __builtin_ia32_rcp28ss_round(A, B, R)
412 #define _mm_mask_rcp28_round_ss(W, U, A, B, R) \
413 __builtin_ia32_rcp28ss_mask_round ((A), (B), (W), (U), (R))
415 #define _mm_maskz_rcp28_round_ss(U, A, B, R) \
416 __builtin_ia32_rcp28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), \
419 #define _mm_rsqrt28_round_sd(A, B, R) \
420 __builtin_ia32_rsqrt28sd_round(A, B, R)
422 #define _mm_mask_rsqrt28_round_sd(W, U, A, B, R) \
423 __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (W), (U), (R))
425 #define _mm_maskz_rsqrt28_round_sd(U, A, B, R) \
426 __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (),\
429 #define _mm_rsqrt28_round_ss(A, B, R) \
430 __builtin_ia32_rsqrt28ss_round(A, B, R)
432 #define _mm_mask_rsqrt28_round_ss(W, U, A, B, R) \
433 __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (W), (U), (R))
435 #define _mm_maskz_rsqrt28_round_ss(U, A, B, R) \
436 __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (),\
441 #define _mm_mask_rcp28_sd(W, U, A, B)\
442 _mm_mask_rcp28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
444 #define _mm_maskz_rcp28_sd(U, A, B)\
445 _mm_maskz_rcp28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
447 #define _mm_mask_rcp28_ss(W, U, A, B)\
448 _mm_mask_rcp28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
450 #define _mm_maskz_rcp28_ss(U, A, B)\
451 _mm_maskz_rcp28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
453 #define _mm_mask_rsqrt28_sd(W, U, A, B)\
454 _mm_mask_rsqrt28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
456 #define _mm_maskz_rsqrt28_sd(U, A, B)\
457 _mm_maskz_rsqrt28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
459 #define _mm_mask_rsqrt28_ss(W, U, A, B)\
460 _mm_mask_rsqrt28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
462 #define _mm_maskz_rsqrt28_ss(U, A, B)\
463 _mm_maskz_rsqrt28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
465 #define _mm512_exp2a23_pd(A) \
466 _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
468 #define _mm512_mask_exp2a23_pd(W, U, A) \
469 _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
471 #define _mm512_maskz_exp2a23_pd(U, A) \
472 _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
474 #define _mm512_exp2a23_ps(A) \
475 _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
477 #define _mm512_mask_exp2a23_ps(W, U, A) \
478 _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
480 #define _mm512_maskz_exp2a23_ps(U, A) \
481 _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
483 #define _mm512_rcp28_pd(A) \
484 _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
486 #define _mm512_mask_rcp28_pd(W, U, A) \
487 _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
489 #define _mm512_maskz_rcp28_pd(U, A) \
490 _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
492 #define _mm512_rcp28_ps(A) \
493 _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
495 #define _mm512_mask_rcp28_ps(W, U, A) \
496 _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
498 #define _mm512_maskz_rcp28_ps(U, A) \
499 _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
501 #define _mm512_rsqrt28_pd(A) \
502 _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
504 #define _mm512_mask_rsqrt28_pd(W, U, A) \
505 _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
507 #define _mm512_maskz_rsqrt28_pd(U, A) \
508 _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
510 #define _mm512_rsqrt28_ps(A) \
511 _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
513 #define _mm512_mask_rsqrt28_ps(W, U, A) \
514 _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
516 #define _mm512_maskz_rsqrt28_ps(U, A) \
517 _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
519 #define _mm_rcp28_sd(A, B) \
520 __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
522 #define _mm_rcp28_ss(A, B) \
523 __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
525 #define _mm_rsqrt28_sd(A, B) \
526 __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
528 #define _mm_rsqrt28_ss(A, B) \
529 __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
531 #ifdef __DISABLE_AVX512ER__
532 #undef __DISABLE_AVX512ER__
533 #pragma GCC pop_options
534 #endif /* __DISABLE_AVX512ER__ */
536 #endif /* _AVX512ERINTRIN_H_INCLUDED */