]>
Commit | Line | Data |
---|---|---|
2bcceb6f MK |
1 | // Internal macros for the simd implementation -*- C++ -*- |
2 | ||
83ffe9cd | 3 | // Copyright (C) 2020-2023 Free Software Foundation, Inc. |
2bcceb6f MK |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the | |
7 | // terms of the GNU General Public License as published by the | |
8 | // Free Software Foundation; either version 3, or (at your option) | |
9 | // any later version. | |
10 | ||
11 | // This library is distributed in the hope that it will be useful, | |
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | // GNU General Public License for more details. | |
15 | ||
16 | // Under Section 7 of GPL version 3, you are granted additional | |
17 | // permissions described in the GCC Runtime Library Exception, version | |
18 | // 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | // You should have received a copy of the GNU General Public License and | |
21 | // a copy of the GCC Runtime Library Exception along with this program; | |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | // <http://www.gnu.org/licenses/>. | |
24 | ||
25 | #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_ | |
26 | #define _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_ | |
27 | ||
28 | #if __cplusplus >= 201703L | |
29 | ||
30 | #include <cstddef> | |
31 | #include <cstdint> | |
32 | ||
6963c3b9 | 33 | /// @cond undocumented |
2bcceb6f MK |
34 | |
35 | #define _GLIBCXX_SIMD_BEGIN_NAMESPACE \ | |
36 | namespace std _GLIBCXX_VISIBILITY(default) \ | |
37 | { \ | |
38 | _GLIBCXX_BEGIN_NAMESPACE_VERSION \ | |
39 | namespace experimental { \ | |
40 | inline namespace parallelism_v2 { | |
41 | #define _GLIBCXX_SIMD_END_NAMESPACE \ | |
42 | } \ | |
43 | } \ | |
44 | _GLIBCXX_END_NAMESPACE_VERSION \ | |
45 | } | |
46 | ||
47 | // ISA extension detection. The following defines all the _GLIBCXX_SIMD_HAVE_XXX | |
48 | // macros ARM{{{ | |
49 | #if defined __ARM_NEON | |
50 | #define _GLIBCXX_SIMD_HAVE_NEON 1 | |
51 | #else | |
52 | #define _GLIBCXX_SIMD_HAVE_NEON 0 | |
53 | #endif | |
54 | #if defined __ARM_NEON && (__ARM_ARCH >= 8 || defined __aarch64__) | |
55 | #define _GLIBCXX_SIMD_HAVE_NEON_A32 1 | |
56 | #else | |
57 | #define _GLIBCXX_SIMD_HAVE_NEON_A32 0 | |
58 | #endif | |
59 | #if defined __ARM_NEON && defined __aarch64__ | |
60 | #define _GLIBCXX_SIMD_HAVE_NEON_A64 1 | |
61 | #else | |
62 | #define _GLIBCXX_SIMD_HAVE_NEON_A64 0 | |
63 | #endif | |
64 | //}}} | |
65 | // x86{{{ | |
66 | #ifdef __MMX__ | |
67 | #define _GLIBCXX_SIMD_HAVE_MMX 1 | |
68 | #else | |
69 | #define _GLIBCXX_SIMD_HAVE_MMX 0 | |
70 | #endif | |
71 | #if defined __SSE__ || defined __x86_64__ | |
72 | #define _GLIBCXX_SIMD_HAVE_SSE 1 | |
73 | #else | |
74 | #define _GLIBCXX_SIMD_HAVE_SSE 0 | |
75 | #endif | |
76 | #if defined __SSE2__ || defined __x86_64__ | |
77 | #define _GLIBCXX_SIMD_HAVE_SSE2 1 | |
78 | #else | |
79 | #define _GLIBCXX_SIMD_HAVE_SSE2 0 | |
80 | #endif | |
81 | #ifdef __SSE3__ | |
82 | #define _GLIBCXX_SIMD_HAVE_SSE3 1 | |
83 | #else | |
84 | #define _GLIBCXX_SIMD_HAVE_SSE3 0 | |
85 | #endif | |
86 | #ifdef __SSSE3__ | |
87 | #define _GLIBCXX_SIMD_HAVE_SSSE3 1 | |
88 | #else | |
89 | #define _GLIBCXX_SIMD_HAVE_SSSE3 0 | |
90 | #endif | |
91 | #ifdef __SSE4_1__ | |
92 | #define _GLIBCXX_SIMD_HAVE_SSE4_1 1 | |
93 | #else | |
94 | #define _GLIBCXX_SIMD_HAVE_SSE4_1 0 | |
95 | #endif | |
96 | #ifdef __SSE4_2__ | |
97 | #define _GLIBCXX_SIMD_HAVE_SSE4_2 1 | |
98 | #else | |
99 | #define _GLIBCXX_SIMD_HAVE_SSE4_2 0 | |
100 | #endif | |
101 | #ifdef __XOP__ | |
102 | #define _GLIBCXX_SIMD_HAVE_XOP 1 | |
103 | #else | |
104 | #define _GLIBCXX_SIMD_HAVE_XOP 0 | |
105 | #endif | |
106 | #ifdef __AVX__ | |
107 | #define _GLIBCXX_SIMD_HAVE_AVX 1 | |
108 | #else | |
109 | #define _GLIBCXX_SIMD_HAVE_AVX 0 | |
110 | #endif | |
111 | #ifdef __AVX2__ | |
112 | #define _GLIBCXX_SIMD_HAVE_AVX2 1 | |
113 | #else | |
114 | #define _GLIBCXX_SIMD_HAVE_AVX2 0 | |
115 | #endif | |
116 | #ifdef __BMI__ | |
117 | #define _GLIBCXX_SIMD_HAVE_BMI1 1 | |
118 | #else | |
119 | #define _GLIBCXX_SIMD_HAVE_BMI1 0 | |
120 | #endif | |
121 | #ifdef __BMI2__ | |
122 | #define _GLIBCXX_SIMD_HAVE_BMI2 1 | |
123 | #else | |
124 | #define _GLIBCXX_SIMD_HAVE_BMI2 0 | |
125 | #endif | |
126 | #ifdef __LZCNT__ | |
127 | #define _GLIBCXX_SIMD_HAVE_LZCNT 1 | |
128 | #else | |
129 | #define _GLIBCXX_SIMD_HAVE_LZCNT 0 | |
130 | #endif | |
131 | #ifdef __SSE4A__ | |
132 | #define _GLIBCXX_SIMD_HAVE_SSE4A 1 | |
133 | #else | |
134 | #define _GLIBCXX_SIMD_HAVE_SSE4A 0 | |
135 | #endif | |
136 | #ifdef __FMA__ | |
137 | #define _GLIBCXX_SIMD_HAVE_FMA 1 | |
138 | #else | |
139 | #define _GLIBCXX_SIMD_HAVE_FMA 0 | |
140 | #endif | |
141 | #ifdef __FMA4__ | |
142 | #define _GLIBCXX_SIMD_HAVE_FMA4 1 | |
143 | #else | |
144 | #define _GLIBCXX_SIMD_HAVE_FMA4 0 | |
145 | #endif | |
146 | #ifdef __F16C__ | |
147 | #define _GLIBCXX_SIMD_HAVE_F16C 1 | |
148 | #else | |
149 | #define _GLIBCXX_SIMD_HAVE_F16C 0 | |
150 | #endif | |
151 | #ifdef __POPCNT__ | |
152 | #define _GLIBCXX_SIMD_HAVE_POPCNT 1 | |
153 | #else | |
154 | #define _GLIBCXX_SIMD_HAVE_POPCNT 0 | |
155 | #endif | |
156 | #ifdef __AVX512F__ | |
157 | #define _GLIBCXX_SIMD_HAVE_AVX512F 1 | |
158 | #else | |
159 | #define _GLIBCXX_SIMD_HAVE_AVX512F 0 | |
160 | #endif | |
161 | #ifdef __AVX512DQ__ | |
162 | #define _GLIBCXX_SIMD_HAVE_AVX512DQ 1 | |
163 | #else | |
164 | #define _GLIBCXX_SIMD_HAVE_AVX512DQ 0 | |
165 | #endif | |
166 | #ifdef __AVX512VL__ | |
167 | #define _GLIBCXX_SIMD_HAVE_AVX512VL 1 | |
168 | #else | |
169 | #define _GLIBCXX_SIMD_HAVE_AVX512VL 0 | |
170 | #endif | |
171 | #ifdef __AVX512BW__ | |
172 | #define _GLIBCXX_SIMD_HAVE_AVX512BW 1 | |
173 | #else | |
174 | #define _GLIBCXX_SIMD_HAVE_AVX512BW 0 | |
175 | #endif | |
52d28210 MK |
176 | #ifdef __AVX512BITALG__ |
177 | #define _GLIBCXX_SIMD_HAVE_AVX512BITALG 1 | |
178 | #else | |
179 | #define _GLIBCXX_SIMD_HAVE_AVX512BITALG 0 | |
180 | #endif | |
181 | #ifdef __AVX512VBMI2__ | |
182 | #define _GLIBCXX_SIMD_HAVE_AVX512VBMI2 1 | |
183 | #else | |
184 | #define _GLIBCXX_SIMD_HAVE_AVX512VBMI2 0 | |
185 | #endif | |
186 | #ifdef __AVX512VBMI__ | |
187 | #define _GLIBCXX_SIMD_HAVE_AVX512VBMI 1 | |
188 | #else | |
189 | #define _GLIBCXX_SIMD_HAVE_AVX512VBMI 0 | |
190 | #endif | |
191 | #ifdef __AVX512IFMA__ | |
192 | #define _GLIBCXX_SIMD_HAVE_AVX512IFMA 1 | |
193 | #else | |
194 | #define _GLIBCXX_SIMD_HAVE_AVX512IFMA 0 | |
195 | #endif | |
196 | #ifdef __AVX512CD__ | |
197 | #define _GLIBCXX_SIMD_HAVE_AVX512CD 1 | |
198 | #else | |
199 | #define _GLIBCXX_SIMD_HAVE_AVX512CD 0 | |
200 | #endif | |
201 | #ifdef __AVX512VNNI__ | |
202 | #define _GLIBCXX_SIMD_HAVE_AVX512VNNI 1 | |
203 | #else | |
204 | #define _GLIBCXX_SIMD_HAVE_AVX512VNNI 0 | |
205 | #endif | |
206 | #ifdef __AVX512VPOPCNTDQ__ | |
207 | #define _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ 1 | |
208 | #else | |
209 | #define _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ 0 | |
210 | #endif | |
211 | #ifdef __AVX512VP2INTERSECT__ | |
212 | #define _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT 1 | |
213 | #else | |
214 | #define _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT 0 | |
215 | #endif | |
2bcceb6f MK |
216 | |
217 | #if _GLIBCXX_SIMD_HAVE_SSE | |
218 | #define _GLIBCXX_SIMD_HAVE_SSE_ABI 1 | |
219 | #else | |
220 | #define _GLIBCXX_SIMD_HAVE_SSE_ABI 0 | |
221 | #endif | |
222 | #if _GLIBCXX_SIMD_HAVE_SSE2 | |
223 | #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 1 | |
224 | #else | |
225 | #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 0 | |
226 | #endif | |
227 | ||
228 | #if _GLIBCXX_SIMD_HAVE_AVX | |
229 | #define _GLIBCXX_SIMD_HAVE_AVX_ABI 1 | |
230 | #else | |
231 | #define _GLIBCXX_SIMD_HAVE_AVX_ABI 0 | |
232 | #endif | |
233 | #if _GLIBCXX_SIMD_HAVE_AVX2 | |
234 | #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 1 | |
235 | #else | |
236 | #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 0 | |
237 | #endif | |
238 | ||
239 | #if _GLIBCXX_SIMD_HAVE_AVX512F | |
240 | #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 1 | |
241 | #else | |
242 | #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 0 | |
243 | #endif | |
244 | #if _GLIBCXX_SIMD_HAVE_AVX512BW | |
245 | #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 1 | |
246 | #else | |
247 | #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 0 | |
248 | #endif | |
249 | ||
250 | #if defined __x86_64__ && !_GLIBCXX_SIMD_HAVE_SSE2 | |
251 | #error "Use of SSE2 is required on AMD64" | |
252 | #endif | |
253 | //}}} | |
254 | ||
255 | #ifdef __clang__ | |
256 | #define _GLIBCXX_SIMD_NORMAL_MATH | |
92c47b15 | 257 | #define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA |
2bcceb6f MK |
258 | #else |
259 | #define _GLIBCXX_SIMD_NORMAL_MATH \ | |
260 | [[__gnu__::__optimize__("finite-math-only,no-signed-zeros")]] | |
92c47b15 | 261 | #define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA __attribute__((__always_inline__)) |
2bcceb6f MK |
262 | #endif |
263 | #define _GLIBCXX_SIMD_NEVER_INLINE [[__gnu__::__noinline__]] | |
264 | #define _GLIBCXX_SIMD_INTRINSIC \ | |
265 | [[__gnu__::__always_inline__, __gnu__::__artificial__]] inline | |
266 | #define _GLIBCXX_SIMD_ALWAYS_INLINE [[__gnu__::__always_inline__]] inline | |
267 | #define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0) | |
268 | #define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1) | |
269 | ||
8ff3ca2d | 270 | #if __STRICT_ANSI__ || defined __clang__ |
2bcceb6f MK |
271 | #define _GLIBCXX_SIMD_CONSTEXPR |
272 | #define _GLIBCXX_SIMD_USE_CONSTEXPR_API const | |
273 | #else | |
274 | #define _GLIBCXX_SIMD_CONSTEXPR constexpr | |
275 | #define _GLIBCXX_SIMD_USE_CONSTEXPR_API constexpr | |
276 | #endif | |
277 | ||
278 | #if defined __clang__ | |
279 | #define _GLIBCXX_SIMD_USE_CONSTEXPR const | |
280 | #else | |
281 | #define _GLIBCXX_SIMD_USE_CONSTEXPR constexpr | |
282 | #endif | |
283 | ||
284 | #define _GLIBCXX_SIMD_LIST_BINARY(__macro) __macro(|) __macro(&) __macro(^) | |
285 | #define _GLIBCXX_SIMD_LIST_SHIFTS(__macro) __macro(<<) __macro(>>) | |
286 | #define _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) \ | |
287 | __macro(+) __macro(-) __macro(*) __macro(/) __macro(%) | |
288 | ||
289 | #define _GLIBCXX_SIMD_ALL_BINARY(__macro) \ | |
290 | _GLIBCXX_SIMD_LIST_BINARY(__macro) static_assert(true) | |
291 | #define _GLIBCXX_SIMD_ALL_SHIFTS(__macro) \ | |
292 | _GLIBCXX_SIMD_LIST_SHIFTS(__macro) static_assert(true) | |
293 | #define _GLIBCXX_SIMD_ALL_ARITHMETICS(__macro) \ | |
294 | _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) static_assert(true) | |
295 | ||
296 | #ifdef _GLIBCXX_SIMD_NO_ALWAYS_INLINE | |
297 | #undef _GLIBCXX_SIMD_ALWAYS_INLINE | |
298 | #define _GLIBCXX_SIMD_ALWAYS_INLINE inline | |
53b55701 MK |
299 | #undef _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA |
300 | #define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA | |
2bcceb6f MK |
301 | #undef _GLIBCXX_SIMD_INTRINSIC |
302 | #define _GLIBCXX_SIMD_INTRINSIC inline | |
303 | #endif | |
304 | ||
305 | #if _GLIBCXX_SIMD_HAVE_SSE || _GLIBCXX_SIMD_HAVE_MMX | |
306 | #define _GLIBCXX_SIMD_X86INTRIN 1 | |
307 | #else | |
308 | #define _GLIBCXX_SIMD_X86INTRIN 0 | |
309 | #endif | |
310 | ||
311 | // workaround macros {{{ | |
312 | // use aliasing loads to help GCC understand the data accesses better | |
313 | // This also seems to hide a miscompilation on swap(x[i], x[i + 1]) with | |
314 | // fixed_size_simd<float, 16> x. | |
315 | #define _GLIBCXX_SIMD_USE_ALIASING_LOADS 1 | |
316 | ||
317 | // vector conversions on x86 not optimized: | |
318 | #if _GLIBCXX_SIMD_X86INTRIN | |
319 | #define _GLIBCXX_SIMD_WORKAROUND_PR85048 1 | |
320 | #endif | |
321 | ||
322 | // integer division not optimized | |
323 | #define _GLIBCXX_SIMD_WORKAROUND_PR90993 1 | |
324 | ||
325 | // very bad codegen for extraction and concatenation of 128/256 "subregisters" | |
326 | // with sizeof(element type) < 8: https://godbolt.org/g/mqUsgM | |
327 | #if _GLIBCXX_SIMD_X86INTRIN | |
328 | #define _GLIBCXX_SIMD_WORKAROUND_XXX_1 1 | |
329 | #endif | |
330 | ||
331 | // bad codegen for 8 Byte memcpy to __vector_type_t<char, 16> | |
332 | #define _GLIBCXX_SIMD_WORKAROUND_PR90424 1 | |
333 | ||
334 | // bad codegen for zero-extend using simple concat(__x, 0) | |
335 | #if _GLIBCXX_SIMD_X86INTRIN | |
336 | #define _GLIBCXX_SIMD_WORKAROUND_XXX_3 1 | |
337 | #endif | |
338 | ||
339 | // https://github.com/cplusplus/parallelism-ts/issues/65 (incorrect return type | |
340 | // of static_simd_cast) | |
341 | #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE65 1 | |
342 | ||
343 | // https://github.com/cplusplus/parallelism-ts/issues/66 (incorrect SFINAE | |
344 | // constraint on (static)_simd_cast) | |
345 | #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 1 | |
346 | // }}} | |
347 | ||
6963c3b9 JW |
348 | /// @endcond |
349 | ||
2bcceb6f MK |
350 | #endif // __cplusplus >= 201703L |
351 | #endif // _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_ | |
352 | ||
353 | // vim: foldmethod=marker |