]> git.ipfire.org Git - thirdparty/gcc.git/blame - libstdc++-v3/include/experimental/bits/simd_detail.h
Update copyright years.
[thirdparty/gcc.git] / libstdc++-v3 / include / experimental / bits / simd_detail.h
CommitLineData
2bcceb6f
MK
1// Internal macros for the simd implementation -*- C++ -*-
2
a945c346 3// Copyright (C) 2020-2024 Free Software Foundation, Inc.
2bcceb6f
MK
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
26#define _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
27
28#if __cplusplus >= 201703L
29
30#include <cstddef>
31#include <cstdint>
32
6963c3b9 33/// @cond undocumented
2bcceb6f
MK
34
35#define _GLIBCXX_SIMD_BEGIN_NAMESPACE \
36 namespace std _GLIBCXX_VISIBILITY(default) \
37 { \
38 _GLIBCXX_BEGIN_NAMESPACE_VERSION \
39 namespace experimental { \
40 inline namespace parallelism_v2 {
41#define _GLIBCXX_SIMD_END_NAMESPACE \
42 } \
43 } \
44 _GLIBCXX_END_NAMESPACE_VERSION \
45 }
46
47// ISA extension detection. The following defines all the _GLIBCXX_SIMD_HAVE_XXX
48// macros ARM{{{
49#if defined __ARM_NEON
50#define _GLIBCXX_SIMD_HAVE_NEON 1
51#else
52#define _GLIBCXX_SIMD_HAVE_NEON 0
53#endif
54#if defined __ARM_NEON && (__ARM_ARCH >= 8 || defined __aarch64__)
55#define _GLIBCXX_SIMD_HAVE_NEON_A32 1
56#else
57#define _GLIBCXX_SIMD_HAVE_NEON_A32 0
58#endif
59#if defined __ARM_NEON && defined __aarch64__
60#define _GLIBCXX_SIMD_HAVE_NEON_A64 1
61#else
62#define _GLIBCXX_SIMD_HAVE_NEON_A64 0
63#endif
64//}}}
65// x86{{{
66#ifdef __MMX__
67#define _GLIBCXX_SIMD_HAVE_MMX 1
68#else
69#define _GLIBCXX_SIMD_HAVE_MMX 0
70#endif
71#if defined __SSE__ || defined __x86_64__
72#define _GLIBCXX_SIMD_HAVE_SSE 1
73#else
74#define _GLIBCXX_SIMD_HAVE_SSE 0
75#endif
76#if defined __SSE2__ || defined __x86_64__
77#define _GLIBCXX_SIMD_HAVE_SSE2 1
78#else
79#define _GLIBCXX_SIMD_HAVE_SSE2 0
80#endif
81#ifdef __SSE3__
82#define _GLIBCXX_SIMD_HAVE_SSE3 1
83#else
84#define _GLIBCXX_SIMD_HAVE_SSE3 0
85#endif
86#ifdef __SSSE3__
87#define _GLIBCXX_SIMD_HAVE_SSSE3 1
88#else
89#define _GLIBCXX_SIMD_HAVE_SSSE3 0
90#endif
91#ifdef __SSE4_1__
92#define _GLIBCXX_SIMD_HAVE_SSE4_1 1
93#else
94#define _GLIBCXX_SIMD_HAVE_SSE4_1 0
95#endif
96#ifdef __SSE4_2__
97#define _GLIBCXX_SIMD_HAVE_SSE4_2 1
98#else
99#define _GLIBCXX_SIMD_HAVE_SSE4_2 0
100#endif
101#ifdef __XOP__
102#define _GLIBCXX_SIMD_HAVE_XOP 1
103#else
104#define _GLIBCXX_SIMD_HAVE_XOP 0
105#endif
106#ifdef __AVX__
107#define _GLIBCXX_SIMD_HAVE_AVX 1
108#else
109#define _GLIBCXX_SIMD_HAVE_AVX 0
110#endif
111#ifdef __AVX2__
112#define _GLIBCXX_SIMD_HAVE_AVX2 1
113#else
114#define _GLIBCXX_SIMD_HAVE_AVX2 0
115#endif
116#ifdef __BMI__
117#define _GLIBCXX_SIMD_HAVE_BMI1 1
118#else
119#define _GLIBCXX_SIMD_HAVE_BMI1 0
120#endif
121#ifdef __BMI2__
122#define _GLIBCXX_SIMD_HAVE_BMI2 1
123#else
124#define _GLIBCXX_SIMD_HAVE_BMI2 0
125#endif
126#ifdef __LZCNT__
127#define _GLIBCXX_SIMD_HAVE_LZCNT 1
128#else
129#define _GLIBCXX_SIMD_HAVE_LZCNT 0
130#endif
131#ifdef __SSE4A__
132#define _GLIBCXX_SIMD_HAVE_SSE4A 1
133#else
134#define _GLIBCXX_SIMD_HAVE_SSE4A 0
135#endif
136#ifdef __FMA__
137#define _GLIBCXX_SIMD_HAVE_FMA 1
138#else
139#define _GLIBCXX_SIMD_HAVE_FMA 0
140#endif
141#ifdef __FMA4__
142#define _GLIBCXX_SIMD_HAVE_FMA4 1
143#else
144#define _GLIBCXX_SIMD_HAVE_FMA4 0
145#endif
146#ifdef __F16C__
147#define _GLIBCXX_SIMD_HAVE_F16C 1
148#else
149#define _GLIBCXX_SIMD_HAVE_F16C 0
150#endif
151#ifdef __POPCNT__
152#define _GLIBCXX_SIMD_HAVE_POPCNT 1
153#else
154#define _GLIBCXX_SIMD_HAVE_POPCNT 0
155#endif
156#ifdef __AVX512F__
157#define _GLIBCXX_SIMD_HAVE_AVX512F 1
158#else
159#define _GLIBCXX_SIMD_HAVE_AVX512F 0
160#endif
161#ifdef __AVX512DQ__
162#define _GLIBCXX_SIMD_HAVE_AVX512DQ 1
163#else
164#define _GLIBCXX_SIMD_HAVE_AVX512DQ 0
165#endif
166#ifdef __AVX512VL__
167#define _GLIBCXX_SIMD_HAVE_AVX512VL 1
168#else
169#define _GLIBCXX_SIMD_HAVE_AVX512VL 0
170#endif
171#ifdef __AVX512BW__
172#define _GLIBCXX_SIMD_HAVE_AVX512BW 1
173#else
174#define _GLIBCXX_SIMD_HAVE_AVX512BW 0
175#endif
52d28210
MK
176#ifdef __AVX512BITALG__
177#define _GLIBCXX_SIMD_HAVE_AVX512BITALG 1
178#else
179#define _GLIBCXX_SIMD_HAVE_AVX512BITALG 0
180#endif
181#ifdef __AVX512VBMI2__
182#define _GLIBCXX_SIMD_HAVE_AVX512VBMI2 1
183#else
184#define _GLIBCXX_SIMD_HAVE_AVX512VBMI2 0
185#endif
186#ifdef __AVX512VBMI__
187#define _GLIBCXX_SIMD_HAVE_AVX512VBMI 1
188#else
189#define _GLIBCXX_SIMD_HAVE_AVX512VBMI 0
190#endif
191#ifdef __AVX512IFMA__
192#define _GLIBCXX_SIMD_HAVE_AVX512IFMA 1
193#else
194#define _GLIBCXX_SIMD_HAVE_AVX512IFMA 0
195#endif
196#ifdef __AVX512CD__
197#define _GLIBCXX_SIMD_HAVE_AVX512CD 1
198#else
199#define _GLIBCXX_SIMD_HAVE_AVX512CD 0
200#endif
201#ifdef __AVX512VNNI__
202#define _GLIBCXX_SIMD_HAVE_AVX512VNNI 1
203#else
204#define _GLIBCXX_SIMD_HAVE_AVX512VNNI 0
205#endif
206#ifdef __AVX512VPOPCNTDQ__
207#define _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ 1
208#else
209#define _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ 0
210#endif
211#ifdef __AVX512VP2INTERSECT__
212#define _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT 1
213#else
214#define _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT 0
215#endif
2bcceb6f
MK
216
217#if _GLIBCXX_SIMD_HAVE_SSE
218#define _GLIBCXX_SIMD_HAVE_SSE_ABI 1
219#else
220#define _GLIBCXX_SIMD_HAVE_SSE_ABI 0
221#endif
222#if _GLIBCXX_SIMD_HAVE_SSE2
223#define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 1
224#else
225#define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 0
226#endif
227
228#if _GLIBCXX_SIMD_HAVE_AVX
229#define _GLIBCXX_SIMD_HAVE_AVX_ABI 1
230#else
231#define _GLIBCXX_SIMD_HAVE_AVX_ABI 0
232#endif
233#if _GLIBCXX_SIMD_HAVE_AVX2
234#define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 1
235#else
236#define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 0
237#endif
238
239#if _GLIBCXX_SIMD_HAVE_AVX512F
240#define _GLIBCXX_SIMD_HAVE_AVX512_ABI 1
241#else
242#define _GLIBCXX_SIMD_HAVE_AVX512_ABI 0
243#endif
244#if _GLIBCXX_SIMD_HAVE_AVX512BW
245#define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 1
246#else
247#define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 0
248#endif
249
250#if defined __x86_64__ && !_GLIBCXX_SIMD_HAVE_SSE2
251#error "Use of SSE2 is required on AMD64"
252#endif
253//}}}
254
255#ifdef __clang__
256#define _GLIBCXX_SIMD_NORMAL_MATH
92c47b15 257#define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
2bcceb6f
MK
258#else
259#define _GLIBCXX_SIMD_NORMAL_MATH \
260 [[__gnu__::__optimize__("finite-math-only,no-signed-zeros")]]
92c47b15 261#define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA __attribute__((__always_inline__))
2bcceb6f
MK
262#endif
263#define _GLIBCXX_SIMD_NEVER_INLINE [[__gnu__::__noinline__]]
264#define _GLIBCXX_SIMD_INTRINSIC \
265 [[__gnu__::__always_inline__, __gnu__::__artificial__]] inline
266#define _GLIBCXX_SIMD_ALWAYS_INLINE [[__gnu__::__always_inline__]] inline
267#define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
268#define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
269
8ff3ca2d 270#if __STRICT_ANSI__ || defined __clang__
2bcceb6f
MK
271#define _GLIBCXX_SIMD_CONSTEXPR
272#define _GLIBCXX_SIMD_USE_CONSTEXPR_API const
273#else
274#define _GLIBCXX_SIMD_CONSTEXPR constexpr
275#define _GLIBCXX_SIMD_USE_CONSTEXPR_API constexpr
276#endif
277
278#if defined __clang__
279#define _GLIBCXX_SIMD_USE_CONSTEXPR const
280#else
281#define _GLIBCXX_SIMD_USE_CONSTEXPR constexpr
282#endif
283
284#define _GLIBCXX_SIMD_LIST_BINARY(__macro) __macro(|) __macro(&) __macro(^)
285#define _GLIBCXX_SIMD_LIST_SHIFTS(__macro) __macro(<<) __macro(>>)
286#define _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) \
287 __macro(+) __macro(-) __macro(*) __macro(/) __macro(%)
288
289#define _GLIBCXX_SIMD_ALL_BINARY(__macro) \
290 _GLIBCXX_SIMD_LIST_BINARY(__macro) static_assert(true)
291#define _GLIBCXX_SIMD_ALL_SHIFTS(__macro) \
292 _GLIBCXX_SIMD_LIST_SHIFTS(__macro) static_assert(true)
293#define _GLIBCXX_SIMD_ALL_ARITHMETICS(__macro) \
294 _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) static_assert(true)
295
296#ifdef _GLIBCXX_SIMD_NO_ALWAYS_INLINE
297#undef _GLIBCXX_SIMD_ALWAYS_INLINE
298#define _GLIBCXX_SIMD_ALWAYS_INLINE inline
53b55701
MK
299#undef _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
300#define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
2bcceb6f
MK
301#undef _GLIBCXX_SIMD_INTRINSIC
302#define _GLIBCXX_SIMD_INTRINSIC inline
303#endif
304
305#if _GLIBCXX_SIMD_HAVE_SSE || _GLIBCXX_SIMD_HAVE_MMX
306#define _GLIBCXX_SIMD_X86INTRIN 1
307#else
308#define _GLIBCXX_SIMD_X86INTRIN 0
309#endif
310
311// workaround macros {{{
312// use aliasing loads to help GCC understand the data accesses better
313// This also seems to hide a miscompilation on swap(x[i], x[i + 1]) with
314// fixed_size_simd<float, 16> x.
315#define _GLIBCXX_SIMD_USE_ALIASING_LOADS 1
316
317// vector conversions on x86 not optimized:
318#if _GLIBCXX_SIMD_X86INTRIN
319#define _GLIBCXX_SIMD_WORKAROUND_PR85048 1
320#endif
321
322// integer division not optimized
403e48ef 323#ifndef __clang__
2bcceb6f 324#define _GLIBCXX_SIMD_WORKAROUND_PR90993 1
403e48ef 325#endif
2bcceb6f
MK
326
327// very bad codegen for extraction and concatenation of 128/256 "subregisters"
328// with sizeof(element type) < 8: https://godbolt.org/g/mqUsgM
329#if _GLIBCXX_SIMD_X86INTRIN
330#define _GLIBCXX_SIMD_WORKAROUND_XXX_1 1
331#endif
332
333// bad codegen for 8 Byte memcpy to __vector_type_t<char, 16>
334#define _GLIBCXX_SIMD_WORKAROUND_PR90424 1
335
336// bad codegen for zero-extend using simple concat(__x, 0)
337#if _GLIBCXX_SIMD_X86INTRIN
338#define _GLIBCXX_SIMD_WORKAROUND_XXX_3 1
339#endif
340
341// https://github.com/cplusplus/parallelism-ts/issues/65 (incorrect return type
342// of static_simd_cast)
343#define _GLIBCXX_SIMD_FIX_P2TS_ISSUE65 1
344
345// https://github.com/cplusplus/parallelism-ts/issues/66 (incorrect SFINAE
346// constraint on (static)_simd_cast)
347#define _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 1
348// }}}
349
6963c3b9
JW
350/// @endcond
351
2bcceb6f
MK
352#endif // __cplusplus >= 201703L
353#endif // _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
354
355// vim: foldmethod=marker