]>
git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/avx512bf16vlintrin.h
1 /* Copyright (C) 2019-2023 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512bf16vlintrin.h> directly; include <immintrin.h> instead."
28 #ifndef _AVX512BF16VLINTRIN_H_INCLUDED
29 #define _AVX512BF16VLINTRIN_H_INCLUDED
31 #if !defined(__AVX512VL__) || !defined(__AVX512BF16__)
32 #pragma GCC push_options
33 #pragma GCC target("avx512bf16,avx512vl")
34 #define __DISABLE_AVX512BF16VL__
35 #endif /* __AVX512BF16__ */
37 /* Internal data types for implementing the intrinsics. */
38 typedef __bf16 __v16bf
__attribute__ ((__vector_size__ (32)));
39 typedef __bf16 __v8bf
__attribute__ ((__vector_size__ (16)));
41 /* The Intel API is flexible enough that we must allow aliasing with other
42 vector types, and their scalar components. */
43 typedef __bf16 __m256bh
__attribute__ ((__vector_size__ (32), __may_alias__
));
44 typedef __bf16 __m128bh
__attribute__ ((__vector_size__ (16), __may_alias__
));
46 typedef __bf16 __bfloat16
;
48 #define _mm256_cvtneps_pbh(A) \
49 (__m128bh) __builtin_ia32_cvtneps2bf16_v8sf (A)
50 #define _mm_cvtneps_pbh(A) \
51 (__m128bh) __builtin_ia32_cvtneps2bf16_v4sf (A)
55 extern __inline __m256bh
56 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
57 _mm256_cvtne2ps_pbh (__m256 __A
, __m256 __B
)
59 return (__m256bh
)__builtin_ia32_cvtne2ps2bf16_v16bf(__A
, __B
);
62 extern __inline __m256bh
63 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
64 _mm256_mask_cvtne2ps_pbh (__m256bh __A
, __mmask16 __B
, __m256 __C
, __m256 __D
)
66 return (__m256bh
)__builtin_ia32_cvtne2ps2bf16_v16bf_mask(__C
, __D
, __A
, __B
);
69 extern __inline __m256bh
70 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
71 _mm256_maskz_cvtne2ps_pbh (__mmask16 __A
, __m256 __B
, __m256 __C
)
73 return (__m256bh
)__builtin_ia32_cvtne2ps2bf16_v16bf_maskz(__B
, __C
, __A
);
76 extern __inline __m128bh
77 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
78 _mm_cvtne2ps_pbh (__m128 __A
, __m128 __B
)
80 return (__m128bh
)__builtin_ia32_cvtne2ps2bf16_v8bf(__A
, __B
);
83 extern __inline __m128bh
84 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
85 _mm_mask_cvtne2ps_pbh (__m128bh __A
, __mmask8 __B
, __m128 __C
, __m128 __D
)
87 return (__m128bh
)__builtin_ia32_cvtne2ps2bf16_v8bf_mask(__C
, __D
, __A
, __B
);
90 extern __inline __m128bh
91 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
92 _mm_maskz_cvtne2ps_pbh (__mmask8 __A
, __m128 __B
, __m128 __C
)
94 return (__m128bh
)__builtin_ia32_cvtne2ps2bf16_v8bf_maskz(__B
, __C
, __A
);
99 extern __inline __m128bh
100 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
101 _mm256_mask_cvtneps_pbh (__m128bh __A
, __mmask8 __B
, __m256 __C
)
103 return (__m128bh
)__builtin_ia32_cvtneps2bf16_v8sf_mask(__C
, __A
, __B
);
106 extern __inline __m128bh
107 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
108 _mm256_maskz_cvtneps_pbh (__mmask8 __A
, __m256 __B
)
110 return (__m128bh
)__builtin_ia32_cvtneps2bf16_v8sf_maskz(__B
, __A
);
113 extern __inline __m128bh
114 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
115 _mm_mask_cvtneps_pbh (__m128bh __A
, __mmask8 __B
, __m128 __C
)
117 return (__m128bh
)__builtin_ia32_cvtneps2bf16_v4sf_mask(__C
, __A
, __B
);
120 extern __inline __m128bh
121 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
122 _mm_maskz_cvtneps_pbh (__mmask8 __A
, __m128 __B
)
124 return (__m128bh
)__builtin_ia32_cvtneps2bf16_v4sf_maskz(__B
, __A
);
129 extern __inline __m256
130 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
131 _mm256_dpbf16_ps (__m256 __A
, __m256bh __B
, __m256bh __C
)
133 return (__m256
)__builtin_ia32_dpbf16ps_v8sf(__A
, __B
, __C
);
136 extern __inline __m256
137 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
138 _mm256_mask_dpbf16_ps (__m256 __A
, __mmask8 __B
, __m256bh __C
, __m256bh __D
)
140 return (__m256
)__builtin_ia32_dpbf16ps_v8sf_mask(__A
, __C
, __D
, __B
);
143 extern __inline __m256
144 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
145 _mm256_maskz_dpbf16_ps (__mmask8 __A
, __m256 __B
, __m256bh __C
, __m256bh __D
)
147 return (__m256
)__builtin_ia32_dpbf16ps_v8sf_maskz(__B
, __C
, __D
, __A
);
150 extern __inline __m128
151 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
152 _mm_dpbf16_ps (__m128 __A
, __m128bh __B
, __m128bh __C
)
154 return (__m128
)__builtin_ia32_dpbf16ps_v4sf(__A
, __B
, __C
);
157 extern __inline __m128
158 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
159 _mm_mask_dpbf16_ps (__m128 __A
, __mmask8 __B
, __m128bh __C
, __m128bh __D
)
161 return (__m128
)__builtin_ia32_dpbf16ps_v4sf_mask(__A
, __C
, __D
, __B
);
164 extern __inline __m128
165 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
166 _mm_maskz_dpbf16_ps (__mmask8 __A
, __m128 __B
, __m128bh __C
, __m128bh __D
)
168 return (__m128
)__builtin_ia32_dpbf16ps_v4sf_maskz(__B
, __C
, __D
, __A
);
171 extern __inline __bf16
172 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
173 _mm_cvtness_sbh (float __A
)
175 __v4sf __V
= {__A
, 0, 0, 0};
176 __v8bf __R
= __builtin_ia32_cvtneps2bf16_v4sf_mask ((__v4sf
)__V
,
177 (__v8bf
)_mm_undefined_si128 (), (__mmask8
)-1);
181 extern __inline __m128
182 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
183 _mm_cvtpbh_ps (__m128bh __A
)
185 return (__m128
)_mm_castsi128_ps ((__m128i
)_mm_slli_epi32 (
186 (__m128i
)_mm_cvtepi16_epi32 ((__m128i
)__A
), 16));
189 extern __inline __m256
190 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
191 _mm256_cvtpbh_ps (__m128bh __A
)
193 return (__m256
)_mm256_castsi256_ps ((__m256i
)_mm256_slli_epi32 (
194 (__m256i
)_mm256_cvtepi16_epi32 ((__m128i
)__A
), 16));
197 extern __inline __m128
198 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
199 _mm_maskz_cvtpbh_ps (__mmask8 __U
, __m128bh __A
)
201 return (__m128
)_mm_castsi128_ps ((__m128i
)_mm_slli_epi32 (
202 (__m128i
)_mm_maskz_cvtepi16_epi32 (
203 (__mmask8
)__U
, (__m128i
)__A
), 16));
206 extern __inline __m256
207 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
208 _mm256_maskz_cvtpbh_ps (__mmask8 __U
, __m128bh __A
)
210 return (__m256
)_mm256_castsi256_ps ((__m256i
)_mm256_slli_epi32 (
211 (__m256i
)_mm256_maskz_cvtepi16_epi32 (
212 (__mmask8
)__U
, (__m128i
)__A
), 16));
215 extern __inline __m128
216 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
217 _mm_mask_cvtpbh_ps (__m128 __S
, __mmask8 __U
, __m128bh __A
)
219 return (__m128
)_mm_castsi128_ps ((__m128i
)_mm_mask_slli_epi32 (
220 (__m128i
)__S
, (__mmask8
)__U
, (__m128i
)_mm_cvtepi16_epi32 (
224 extern __inline __m256
225 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
226 _mm256_mask_cvtpbh_ps (__m256 __S
, __mmask8 __U
, __m128bh __A
)
228 return (__m256
)_mm256_castsi256_ps ((__m256i
)_mm256_mask_slli_epi32 (
229 (__m256i
)__S
, (__mmask8
)__U
, (__m256i
)_mm256_cvtepi16_epi32 (
233 #ifdef __DISABLE_AVX512BF16VL__
234 #undef __DISABLE_AVX512BF16VL__
235 #pragma GCC pop_options
236 #endif /* __DISABLE_AVX512BF16VL__ */
238 #endif /* _AVX512BF16VLINTRIN_H_INCLUDED */