]>
Commit | Line | Data |
---|---|---|
2bcceb6f MK |
1 | // Simd fixed_size ABI specific implementations -*- C++ -*- |
2 | ||
a945c346 | 3 | // Copyright (C) 2020-2024 Free Software Foundation, Inc. |
2bcceb6f MK |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the | |
7 | // terms of the GNU General Public License as published by the | |
8 | // Free Software Foundation; either version 3, or (at your option) | |
9 | // any later version. | |
10 | ||
11 | // This library is distributed in the hope that it will be useful, | |
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | // GNU General Public License for more details. | |
15 | ||
16 | // Under Section 7 of GPL version 3, you are granted additional | |
17 | // permissions described in the GCC Runtime Library Exception, version | |
18 | // 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | // You should have received a copy of the GNU General Public License and | |
21 | // a copy of the GCC Runtime Library Exception along with this program; | |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | // <http://www.gnu.org/licenses/>. | |
24 | ||
25 | /* | |
26 | * The fixed_size ABI gives the following guarantees: | |
27 | * - simd objects are passed via the stack | |
28 | * - memory layout of `simd<_Tp, _Np>` is equivalent to `array<_Tp, _Np>` | |
29 | * - alignment of `simd<_Tp, _Np>` is `_Np * sizeof(_Tp)` if _Np is __a | |
30 | * power-of-2 value, otherwise `std::__bit_ceil(_Np * sizeof(_Tp))` (Note: | |
31 | * if the alignment were to exceed the system/compiler maximum, it is bounded | |
32 | * to that maximum) | |
33 | * - simd_mask objects are passed like bitset<_Np> | |
34 | * - memory layout of `simd_mask<_Tp, _Np>` is equivalent to `bitset<_Np>` | |
35 | * - alignment of `simd_mask<_Tp, _Np>` is equal to the alignment of | |
36 | * `bitset<_Np>` | |
37 | */ | |
38 | ||
39 | #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_ | |
40 | #define _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_ | |
41 | ||
42 | #if __cplusplus >= 201703L | |
43 | ||
44 | #include <array> | |
45 | ||
46 | _GLIBCXX_SIMD_BEGIN_NAMESPACE | |
47 | ||
48 | // __simd_tuple_element {{{ | |
49 | template <size_t _I, typename _Tp> | |
50 | struct __simd_tuple_element; | |
51 | ||
52 | template <typename _Tp, typename _A0, typename... _As> | |
53 | struct __simd_tuple_element<0, _SimdTuple<_Tp, _A0, _As...>> | |
54 | { using type = simd<_Tp, _A0>; }; | |
55 | ||
56 | template <size_t _I, typename _Tp, typename _A0, typename... _As> | |
57 | struct __simd_tuple_element<_I, _SimdTuple<_Tp, _A0, _As...>> | |
b31186e5 | 58 | { using type = typename __simd_tuple_element<_I - 1, _SimdTuple<_Tp, _As...>>::type; }; |
2bcceb6f MK |
59 | |
60 | template <size_t _I, typename _Tp> | |
61 | using __simd_tuple_element_t = typename __simd_tuple_element<_I, _Tp>::type; | |
62 | ||
63 | // }}} | |
64 | // __simd_tuple_concat {{{ | |
65 | ||
66 | template <typename _Tp, typename... _A0s, typename... _A1s> | |
67 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0s..., _A1s...> | |
68 | __simd_tuple_concat(const _SimdTuple<_Tp, _A0s...>& __left, | |
69 | const _SimdTuple<_Tp, _A1s...>& __right) | |
70 | { | |
71 | if constexpr (sizeof...(_A0s) == 0) | |
72 | return __right; | |
73 | else if constexpr (sizeof...(_A1s) == 0) | |
74 | return __left; | |
75 | else | |
76 | return {__left.first, __simd_tuple_concat(__left.second, __right)}; | |
77 | } | |
78 | ||
79 | template <typename _Tp, typename _A10, typename... _A1s> | |
b31186e5 MK |
80 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, simd_abi::scalar, _A10, _A1s...> |
81 | __simd_tuple_concat(const _Tp& __left, const _SimdTuple<_Tp, _A10, _A1s...>& __right) | |
2bcceb6f MK |
82 | { return {__left, __right}; } |
83 | ||
84 | // }}} | |
85 | // __simd_tuple_pop_front {{{ | |
86 | // Returns the next _SimdTuple in __x that has _Np elements less. | |
87 | // Precondition: _Np must match the number of elements in __first (recursively) | |
88 | template <size_t _Np, typename _Tp> | |
89 | _GLIBCXX_SIMD_INTRINSIC constexpr decltype(auto) | |
90 | __simd_tuple_pop_front(_Tp&& __x) | |
91 | { | |
92 | if constexpr (_Np == 0) | |
93 | return static_cast<_Tp&&>(__x); | |
94 | else | |
95 | { | |
96 | using _Up = __remove_cvref_t<_Tp>; | |
97 | static_assert(_Np >= _Up::_S_first_size); | |
98 | return __simd_tuple_pop_front<_Np - _Up::_S_first_size>(__x.second); | |
99 | } | |
100 | } | |
101 | ||
102 | // }}} | |
103 | // __get_simd_at<_Np> {{{1 | |
104 | struct __as_simd {}; | |
105 | ||
106 | struct __as_simd_tuple {}; | |
107 | ||
108 | template <typename _Tp, typename _A0, typename... _Abis> | |
109 | _GLIBCXX_SIMD_INTRINSIC constexpr simd<_Tp, _A0> | |
b31186e5 | 110 | __simd_tuple_get_impl(__as_simd, const _SimdTuple<_Tp, _A0, _Abis...>& __t, _SizeConstant<0>) |
2bcceb6f MK |
111 | { return {__private_init, __t.first}; } |
112 | ||
113 | template <typename _Tp, typename _A0, typename... _Abis> | |
114 | _GLIBCXX_SIMD_INTRINSIC constexpr const auto& | |
b31186e5 | 115 | __simd_tuple_get_impl(__as_simd_tuple, const _SimdTuple<_Tp, _A0, _Abis...>& __t, |
2bcceb6f MK |
116 | _SizeConstant<0>) |
117 | { return __t.first; } | |
118 | ||
119 | template <typename _Tp, typename _A0, typename... _Abis> | |
120 | _GLIBCXX_SIMD_INTRINSIC constexpr auto& | |
b31186e5 | 121 | __simd_tuple_get_impl(__as_simd_tuple, _SimdTuple<_Tp, _A0, _Abis...>& __t, _SizeConstant<0>) |
2bcceb6f MK |
122 | { return __t.first; } |
123 | ||
124 | template <typename _R, size_t _Np, typename _Tp, typename... _Abis> | |
125 | _GLIBCXX_SIMD_INTRINSIC constexpr auto | |
b31186e5 | 126 | __simd_tuple_get_impl(_R, const _SimdTuple<_Tp, _Abis...>& __t, _SizeConstant<_Np>) |
2bcceb6f MK |
127 | { return __simd_tuple_get_impl(_R(), __t.second, _SizeConstant<_Np - 1>()); } |
128 | ||
129 | template <size_t _Np, typename _Tp, typename... _Abis> | |
130 | _GLIBCXX_SIMD_INTRINSIC constexpr auto& | |
b31186e5 MK |
131 | __simd_tuple_get_impl(__as_simd_tuple, _SimdTuple<_Tp, _Abis...>& __t, _SizeConstant<_Np>) |
132 | { return __simd_tuple_get_impl(__as_simd_tuple(), __t.second, _SizeConstant<_Np - 1>()); } | |
2bcceb6f MK |
133 | |
134 | template <size_t _Np, typename _Tp, typename... _Abis> | |
135 | _GLIBCXX_SIMD_INTRINSIC constexpr auto | |
136 | __get_simd_at(const _SimdTuple<_Tp, _Abis...>& __t) | |
137 | { return __simd_tuple_get_impl(__as_simd(), __t, _SizeConstant<_Np>()); } | |
138 | ||
139 | // }}} | |
140 | // __get_tuple_at<_Np> {{{ | |
141 | template <size_t _Np, typename _Tp, typename... _Abis> | |
142 | _GLIBCXX_SIMD_INTRINSIC constexpr auto | |
143 | __get_tuple_at(const _SimdTuple<_Tp, _Abis...>& __t) | |
b31186e5 | 144 | { return __simd_tuple_get_impl(__as_simd_tuple(), __t, _SizeConstant<_Np>()); } |
2bcceb6f MK |
145 | |
146 | template <size_t _Np, typename _Tp, typename... _Abis> | |
147 | _GLIBCXX_SIMD_INTRINSIC constexpr auto& | |
148 | __get_tuple_at(_SimdTuple<_Tp, _Abis...>& __t) | |
b31186e5 | 149 | { return __simd_tuple_get_impl(__as_simd_tuple(), __t, _SizeConstant<_Np>()); } |
2bcceb6f MK |
150 | |
151 | // __tuple_element_meta {{{1 | |
152 | template <typename _Tp, typename _Abi, size_t _Offset> | |
153 | struct __tuple_element_meta : public _Abi::_SimdImpl | |
154 | { | |
155 | static_assert(is_same_v<typename _Abi::_SimdImpl::abi_type, | |
156 | _Abi>); // this fails e.g. when _SimdImpl is an | |
157 | // alias for _SimdImplBuiltin<_DifferentAbi> | |
158 | using value_type = _Tp; | |
159 | using abi_type = _Abi; | |
160 | using _Traits = _SimdTraits<_Tp, _Abi>; | |
161 | using _MaskImpl = typename _Abi::_MaskImpl; | |
162 | using _MaskMember = typename _Traits::_MaskMember; | |
163 | using simd_type = simd<_Tp, _Abi>; | |
164 | static constexpr size_t _S_offset = _Offset; | |
165 | static constexpr size_t _S_size() { return simd_size<_Tp, _Abi>::value; } | |
166 | static constexpr _MaskImpl _S_mask_impl = {}; | |
167 | ||
168 | template <size_t _Np, bool _Sanitized> | |
da579188 | 169 | _GLIBCXX_SIMD_INTRINSIC static constexpr auto |
2bcceb6f MK |
170 | _S_submask(_BitMask<_Np, _Sanitized> __bits) |
171 | { return __bits.template _M_extract<_Offset, _S_size()>(); } | |
172 | ||
173 | template <size_t _Np, bool _Sanitized> | |
da579188 | 174 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember |
2bcceb6f MK |
175 | _S_make_mask(_BitMask<_Np, _Sanitized> __bits) |
176 | { | |
177 | return _MaskImpl::template _S_convert<_Tp>( | |
178 | __bits.template _M_extract<_Offset, _S_size()>()._M_sanitized()); | |
179 | } | |
180 | ||
da579188 | 181 | _GLIBCXX_SIMD_INTRINSIC static constexpr _ULLong |
2bcceb6f MK |
182 | _S_mask_to_shifted_ullong(_MaskMember __k) |
183 | { return _MaskImpl::_S_to_bits(__k).to_ullong() << _Offset; } | |
184 | }; | |
185 | ||
186 | template <size_t _Offset, typename _Tp, typename _Abi, typename... _As> | |
da579188 | 187 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2bcceb6f MK |
188 | __tuple_element_meta<_Tp, _Abi, _Offset> |
189 | __make_meta(const _SimdTuple<_Tp, _Abi, _As...>&) | |
190 | { return {}; } | |
191 | ||
192 | // }}}1 | |
193 | // _WithOffset wrapper class {{{ | |
194 | template <size_t _Offset, typename _Base> | |
195 | struct _WithOffset : public _Base | |
196 | { | |
197 | static inline constexpr size_t _S_offset = _Offset; | |
198 | ||
b31186e5 MK |
199 | _GLIBCXX_SIMD_INTRINSIC char* |
200 | _M_as_charptr() | |
201 | { return reinterpret_cast<char*>(this) + _S_offset * sizeof(typename _Base::value_type); } | |
2bcceb6f | 202 | |
b31186e5 MK |
203 | _GLIBCXX_SIMD_INTRINSIC const char* |
204 | _M_as_charptr() const | |
205 | { return reinterpret_cast<const char*>(this) + _S_offset * sizeof(typename _Base::value_type); } | |
2bcceb6f MK |
206 | }; |
207 | ||
208 | // make _WithOffset<_WithOffset> ill-formed to use: | |
209 | template <size_t _O0, size_t _O1, typename _Base> | |
210 | struct _WithOffset<_O0, _WithOffset<_O1, _Base>> {}; | |
211 | ||
212 | template <size_t _Offset, typename _Tp> | |
52d28210 | 213 | _GLIBCXX_SIMD_INTRINSIC |
2bcceb6f MK |
214 | decltype(auto) |
215 | __add_offset(_Tp& __base) | |
216 | { return static_cast<_WithOffset<_Offset, __remove_cvref_t<_Tp>>&>(__base); } | |
217 | ||
218 | template <size_t _Offset, typename _Tp> | |
52d28210 | 219 | _GLIBCXX_SIMD_INTRINSIC |
2bcceb6f MK |
220 | decltype(auto) |
221 | __add_offset(const _Tp& __base) | |
b31186e5 | 222 | { return static_cast<const _WithOffset<_Offset, __remove_cvref_t<_Tp>>&>(__base); } |
2bcceb6f MK |
223 | |
224 | template <size_t _Offset, size_t _ExistingOffset, typename _Tp> | |
52d28210 | 225 | _GLIBCXX_SIMD_INTRINSIC |
2bcceb6f MK |
226 | decltype(auto) |
227 | __add_offset(_WithOffset<_ExistingOffset, _Tp>& __base) | |
b31186e5 | 228 | { return static_cast<_WithOffset<_Offset + _ExistingOffset, _Tp>&>(static_cast<_Tp&>(__base)); } |
2bcceb6f MK |
229 | |
230 | template <size_t _Offset, size_t _ExistingOffset, typename _Tp> | |
52d28210 | 231 | _GLIBCXX_SIMD_INTRINSIC |
2bcceb6f MK |
232 | decltype(auto) |
233 | __add_offset(const _WithOffset<_ExistingOffset, _Tp>& __base) | |
234 | { | |
235 | return static_cast<const _WithOffset<_Offset + _ExistingOffset, _Tp>&>( | |
236 | static_cast<const _Tp&>(__base)); | |
237 | } | |
238 | ||
239 | template <typename _Tp> | |
240 | constexpr inline size_t __offset = 0; | |
241 | ||
242 | template <size_t _Offset, typename _Tp> | |
243 | constexpr inline size_t __offset<_WithOffset<_Offset, _Tp>> | |
244 | = _WithOffset<_Offset, _Tp>::_S_offset; | |
245 | ||
246 | template <typename _Tp> | |
247 | constexpr inline size_t __offset<const _Tp> = __offset<_Tp>; | |
248 | ||
249 | template <typename _Tp> | |
250 | constexpr inline size_t __offset<_Tp&> = __offset<_Tp>; | |
251 | ||
252 | template <typename _Tp> | |
253 | constexpr inline size_t __offset<_Tp&&> = __offset<_Tp>; | |
254 | ||
255 | // }}} | |
256 | // _SimdTuple specializations {{{1 | |
257 | // empty {{{2 | |
258 | template <typename _Tp> | |
259 | struct _SimdTuple<_Tp> | |
260 | { | |
261 | using value_type = _Tp; | |
262 | static constexpr size_t _S_tuple_size = 0; | |
263 | static constexpr size_t _S_size() { return 0; } | |
264 | }; | |
265 | ||
266 | // _SimdTupleData {{{2 | |
267 | template <typename _FirstType, typename _SecondType> | |
268 | struct _SimdTupleData | |
269 | { | |
270 | _FirstType first; | |
271 | _SecondType second; | |
272 | ||
273 | _GLIBCXX_SIMD_INTRINSIC | |
b31186e5 MK |
274 | constexpr bool |
275 | _M_is_constprop() const | |
2bcceb6f MK |
276 | { |
277 | if constexpr (is_class_v<_FirstType>) | |
278 | return first._M_is_constprop() && second._M_is_constprop(); | |
279 | else | |
280 | return __builtin_constant_p(first) && second._M_is_constprop(); | |
281 | } | |
282 | }; | |
283 | ||
284 | template <typename _FirstType, typename _Tp> | |
285 | struct _SimdTupleData<_FirstType, _SimdTuple<_Tp>> | |
286 | { | |
287 | _FirstType first; | |
288 | static constexpr _SimdTuple<_Tp> second = {}; | |
289 | ||
290 | _GLIBCXX_SIMD_INTRINSIC | |
b31186e5 MK |
291 | constexpr bool |
292 | _M_is_constprop() const | |
2bcceb6f MK |
293 | { |
294 | if constexpr (is_class_v<_FirstType>) | |
295 | return first._M_is_constprop(); | |
296 | else | |
297 | return __builtin_constant_p(first); | |
298 | } | |
299 | }; | |
300 | ||
301 | // 1 or more {{{2 | |
302 | template <typename _Tp, typename _Abi0, typename... _Abis> | |
303 | struct _SimdTuple<_Tp, _Abi0, _Abis...> | |
304 | : _SimdTupleData<typename _SimdTraits<_Tp, _Abi0>::_SimdMember, | |
305 | _SimdTuple<_Tp, _Abis...>> | |
306 | { | |
307 | static_assert(!__is_fixed_size_abi_v<_Abi0>); | |
308 | using value_type = _Tp; | |
309 | using _FirstType = typename _SimdTraits<_Tp, _Abi0>::_SimdMember; | |
310 | using _FirstAbi = _Abi0; | |
311 | using _SecondType = _SimdTuple<_Tp, _Abis...>; | |
312 | static constexpr size_t _S_tuple_size = sizeof...(_Abis) + 1; | |
313 | ||
314 | static constexpr size_t _S_size() | |
315 | { return simd_size_v<_Tp, _Abi0> + _SecondType::_S_size(); } | |
316 | ||
317 | static constexpr size_t _S_first_size = simd_size_v<_Tp, _Abi0>; | |
318 | static constexpr bool _S_is_homogeneous = (is_same_v<_Abi0, _Abis> && ...); | |
319 | ||
320 | using _Base = _SimdTupleData<typename _SimdTraits<_Tp, _Abi0>::_SimdMember, | |
321 | _SimdTuple<_Tp, _Abis...>>; | |
322 | using _Base::first; | |
323 | using _Base::second; | |
324 | ||
325 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple() = default; | |
326 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple(const _SimdTuple&) = default; | |
327 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple& operator=(const _SimdTuple&) | |
328 | = default; | |
329 | ||
330 | template <typename _Up> | |
b31186e5 MK |
331 | _GLIBCXX_SIMD_INTRINSIC constexpr |
332 | _SimdTuple(_Up&& __x) | |
2bcceb6f MK |
333 | : _Base{static_cast<_Up&&>(__x)} {} |
334 | ||
335 | template <typename _Up, typename _Up2> | |
b31186e5 MK |
336 | _GLIBCXX_SIMD_INTRINSIC constexpr |
337 | _SimdTuple(_Up&& __x, _Up2&& __y) | |
2bcceb6f MK |
338 | : _Base{static_cast<_Up&&>(__x), static_cast<_Up2&&>(__y)} {} |
339 | ||
340 | template <typename _Up> | |
b31186e5 MK |
341 | _GLIBCXX_SIMD_INTRINSIC constexpr |
342 | _SimdTuple(_Up&& __x, _SimdTuple<_Tp>) | |
2bcceb6f MK |
343 | : _Base{static_cast<_Up&&>(__x)} {} |
344 | ||
b31186e5 MK |
345 | _GLIBCXX_SIMD_INTRINSIC char* |
346 | _M_as_charptr() | |
2bcceb6f MK |
347 | { return reinterpret_cast<char*>(this); } |
348 | ||
b31186e5 MK |
349 | _GLIBCXX_SIMD_INTRINSIC const char* |
350 | _M_as_charptr() const | |
2bcceb6f MK |
351 | { return reinterpret_cast<const char*>(this); } |
352 | ||
353 | template <size_t _Np> | |
b31186e5 MK |
354 | _GLIBCXX_SIMD_INTRINSIC constexpr auto& |
355 | _M_at() | |
2bcceb6f MK |
356 | { |
357 | if constexpr (_Np == 0) | |
358 | return first; | |
359 | else | |
360 | return second.template _M_at<_Np - 1>(); | |
361 | } | |
362 | ||
363 | template <size_t _Np> | |
b31186e5 MK |
364 | _GLIBCXX_SIMD_INTRINSIC constexpr const auto& |
365 | _M_at() const | |
2bcceb6f MK |
366 | { |
367 | if constexpr (_Np == 0) | |
368 | return first; | |
369 | else | |
370 | return second.template _M_at<_Np - 1>(); | |
371 | } | |
372 | ||
373 | template <size_t _Np> | |
b31186e5 MK |
374 | _GLIBCXX_SIMD_INTRINSIC constexpr auto |
375 | _M_simd_at() const | |
2bcceb6f MK |
376 | { |
377 | if constexpr (_Np == 0) | |
378 | return simd<_Tp, _Abi0>(__private_init, first); | |
379 | else | |
380 | return second.template _M_simd_at<_Np - 1>(); | |
381 | } | |
382 | ||
383 | template <size_t _Offset = 0, typename _Fp> | |
384 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple | |
385 | _S_generate(_Fp&& __gen, _SizeConstant<_Offset> = {}) | |
386 | { | |
387 | auto&& __first = __gen(__tuple_element_meta<_Tp, _Abi0, _Offset>()); | |
388 | if constexpr (_S_tuple_size == 1) | |
389 | return {__first}; | |
390 | else | |
391 | return {__first, | |
392 | _SecondType::_S_generate( | |
393 | static_cast<_Fp&&>(__gen), | |
394 | _SizeConstant<_Offset + simd_size_v<_Tp, _Abi0>>())}; | |
395 | } | |
396 | ||
397 | template <size_t _Offset = 0, typename _Fp, typename... _More> | |
398 | _GLIBCXX_SIMD_INTRINSIC _SimdTuple | |
399 | _M_apply_wrapped(_Fp&& __fun, const _More&... __more) const | |
400 | { | |
401 | auto&& __first | |
402 | = __fun(__make_meta<_Offset>(*this), first, __more.first...); | |
403 | if constexpr (_S_tuple_size == 1) | |
404 | return {__first}; | |
405 | else | |
406 | return { | |
407 | __first, | |
408 | second.template _M_apply_wrapped<_Offset + simd_size_v<_Tp, _Abi0>>( | |
409 | static_cast<_Fp&&>(__fun), __more.second...)}; | |
410 | } | |
411 | ||
412 | template <typename _Tup> | |
413 | _GLIBCXX_SIMD_INTRINSIC constexpr decltype(auto) | |
414 | _M_extract_argument(_Tup&& __tup) const | |
415 | { | |
416 | using _TupT = typename __remove_cvref_t<_Tup>::value_type; | |
417 | if constexpr (is_same_v<_SimdTuple, __remove_cvref_t<_Tup>>) | |
418 | return __tup.first; | |
419 | else if (__builtin_is_constant_evaluated()) | |
53b55701 MK |
420 | return __fixed_size_storage_t<_TupT, _S_first_size>::_S_generate( |
421 | [&](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { | |
422 | return __meta._S_generator( | |
423 | [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { | |
424 | return __tup[__i]; | |
425 | }, static_cast<_TupT*>(nullptr)); | |
2bcceb6f MK |
426 | }); |
427 | else | |
53b55701 | 428 | return [&]() { // not always_inline; allow the compiler to decide |
2bcceb6f MK |
429 | __fixed_size_storage_t<_TupT, _S_first_size> __r; |
430 | __builtin_memcpy(__r._M_as_charptr(), __tup._M_as_charptr(), | |
431 | sizeof(__r)); | |
432 | return __r; | |
433 | }(); | |
434 | } | |
435 | ||
436 | template <typename _Tup> | |
437 | _GLIBCXX_SIMD_INTRINSIC constexpr auto& | |
438 | _M_skip_argument(_Tup&& __tup) const | |
439 | { | |
440 | static_assert(_S_tuple_size > 1); | |
441 | using _Up = __remove_cvref_t<_Tup>; | |
442 | constexpr size_t __off = __offset<_Up>; | |
443 | if constexpr (_S_first_size == _Up::_S_first_size && __off == 0) | |
444 | return __tup.second; | |
445 | else if constexpr (_S_first_size > _Up::_S_first_size | |
446 | && _S_first_size % _Up::_S_first_size == 0 | |
447 | && __off == 0) | |
448 | return __simd_tuple_pop_front<_S_first_size>(__tup); | |
449 | else if constexpr (_S_first_size + __off < _Up::_S_first_size) | |
450 | return __add_offset<_S_first_size>(__tup); | |
451 | else if constexpr (_S_first_size + __off == _Up::_S_first_size) | |
452 | return __tup.second; | |
453 | else | |
454 | __assert_unreachable<_Tup>(); | |
455 | } | |
456 | ||
457 | template <size_t _Offset, typename... _More> | |
458 | _GLIBCXX_SIMD_INTRINSIC constexpr void | |
459 | _M_assign_front(const _SimdTuple<_Tp, _Abi0, _More...>& __x) & | |
460 | { | |
461 | static_assert(_Offset == 0); | |
462 | first = __x.first; | |
463 | if constexpr (sizeof...(_More) > 0) | |
464 | { | |
465 | static_assert(sizeof...(_Abis) >= sizeof...(_More)); | |
466 | second.template _M_assign_front<0>(__x.second); | |
467 | } | |
468 | } | |
469 | ||
470 | template <size_t _Offset> | |
471 | _GLIBCXX_SIMD_INTRINSIC constexpr void | |
472 | _M_assign_front(const _FirstType& __x) & | |
473 | { | |
474 | static_assert(_Offset == 0); | |
475 | first = __x; | |
476 | } | |
477 | ||
478 | template <size_t _Offset, typename... _As> | |
479 | _GLIBCXX_SIMD_INTRINSIC constexpr void | |
480 | _M_assign_front(const _SimdTuple<_Tp, _As...>& __x) & | |
481 | { | |
482 | __builtin_memcpy(_M_as_charptr() + _Offset * sizeof(value_type), | |
483 | __x._M_as_charptr(), | |
484 | sizeof(_Tp) * _SimdTuple<_Tp, _As...>::_S_size()); | |
485 | } | |
486 | ||
487 | /* | |
488 | * Iterate over the first objects in this _SimdTuple and call __fun for each | |
489 | * of them. If additional arguments are passed via __more, chunk them into | |
490 | * _SimdTuple or __vector_type_t objects of the same number of values. | |
491 | */ | |
492 | template <typename _Fp, typename... _More> | |
493 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple | |
494 | _M_apply_per_chunk(_Fp&& __fun, _More&&... __more) const | |
495 | { | |
496 | if constexpr ((... | |
497 | || conjunction_v< | |
498 | is_lvalue_reference<_More>, | |
499 | negation<is_const<remove_reference_t<_More>>>>) ) | |
500 | { | |
501 | // need to write back at least one of __more after calling __fun | |
53b55701 | 502 | auto&& __first = [&](auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
2bcceb6f MK |
503 | auto __r = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first, |
504 | __args...); | |
505 | [[maybe_unused]] auto&& __ignore_me = {( | |
53b55701 | 506 | [](auto&& __dst, const auto& __src) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
2bcceb6f MK |
507 | if constexpr (is_assignable_v<decltype(__dst), |
508 | decltype(__dst)>) | |
509 | { | |
510 | __dst.template _M_assign_front<__offset<decltype(__dst)>>( | |
511 | __src); | |
512 | } | |
513 | }(static_cast<_More&&>(__more), __args), | |
514 | 0)...}; | |
515 | return __r; | |
53b55701 | 516 | }(_M_extract_argument(__more)...); |
2bcceb6f MK |
517 | if constexpr (_S_tuple_size == 1) |
518 | return {__first}; | |
519 | else | |
520 | return {__first, | |
521 | second._M_apply_per_chunk(static_cast<_Fp&&>(__fun), | |
522 | _M_skip_argument(__more)...)}; | |
523 | } | |
524 | else | |
525 | { | |
526 | auto&& __first = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first, | |
527 | _M_extract_argument(__more)...); | |
528 | if constexpr (_S_tuple_size == 1) | |
529 | return {__first}; | |
530 | else | |
531 | return {__first, | |
532 | second._M_apply_per_chunk(static_cast<_Fp&&>(__fun), | |
533 | _M_skip_argument(__more)...)}; | |
534 | } | |
535 | } | |
536 | ||
537 | template <typename _R = _Tp, typename _Fp, typename... _More> | |
da579188 | 538 | _GLIBCXX_SIMD_INTRINSIC constexpr auto |
b31186e5 | 539 | _M_apply_r(_Fp&& __fun, const _More&... __more) const |
2bcceb6f MK |
540 | { |
541 | auto&& __first = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first, | |
542 | __more.first...); | |
543 | if constexpr (_S_tuple_size == 1) | |
544 | return __first; | |
545 | else | |
546 | return __simd_tuple_concat<_R>( | |
547 | __first, second.template _M_apply_r<_R>(static_cast<_Fp&&>(__fun), | |
548 | __more.second...)); | |
549 | } | |
550 | ||
551 | template <typename _Fp, typename... _More> | |
552 | _GLIBCXX_SIMD_INTRINSIC constexpr friend _SanitizedBitMask<_S_size()> | |
553 | _M_test(const _Fp& __fun, const _SimdTuple& __x, const _More&... __more) | |
554 | { | |
555 | const _SanitizedBitMask<_S_first_size> __first | |
556 | = _Abi0::_MaskImpl::_S_to_bits( | |
557 | __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), __x.first, | |
558 | __more.first...)); | |
559 | if constexpr (_S_tuple_size == 1) | |
560 | return __first; | |
561 | else | |
562 | return _M_test(__fun, __x.second, __more.second...) | |
563 | ._M_prepend(__first); | |
564 | } | |
565 | ||
566 | template <typename _Up, _Up _I> | |
567 | _GLIBCXX_SIMD_INTRINSIC constexpr _Tp | |
568 | operator[](integral_constant<_Up, _I>) const noexcept | |
569 | { | |
570 | if constexpr (_I < simd_size_v<_Tp, _Abi0>) | |
571 | return _M_subscript_read(_I); | |
572 | else | |
573 | return second[integral_constant<_Up, _I - simd_size_v<_Tp, _Abi0>>()]; | |
574 | } | |
575 | ||
da579188 | 576 | _GLIBCXX_SIMD_INTRINSIC constexpr _Tp |
b31186e5 | 577 | operator[](size_t __i) const noexcept |
2bcceb6f MK |
578 | { |
579 | if constexpr (_S_tuple_size == 1) | |
580 | return _M_subscript_read(__i); | |
2bcceb6f | 581 | #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS |
da579188 MK |
582 | else if (not __builtin_is_constant_evaluated()) |
583 | return reinterpret_cast<const __may_alias<_Tp>*>(this)[__i]; | |
2bcceb6f | 584 | #endif |
da579188 MK |
585 | else if constexpr (__is_scalar_abi<_Abi0>()) |
586 | { | |
587 | const _Tp* ptr = &first; | |
588 | return ptr[__i]; | |
2bcceb6f | 589 | } |
da579188 MK |
590 | else |
591 | return __i < simd_size_v<_Tp, _Abi0> ? _M_subscript_read(__i) | |
592 | : second[__i - simd_size_v<_Tp, _Abi0>]; | |
2bcceb6f MK |
593 | } |
594 | ||
da579188 | 595 | _GLIBCXX_SIMD_INTRINSIC constexpr void |
b31186e5 | 596 | _M_set(size_t __i, _Tp __val) noexcept |
2bcceb6f MK |
597 | { |
598 | if constexpr (_S_tuple_size == 1) | |
599 | return _M_subscript_write(__i, __val); | |
2bcceb6f | 600 | #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS |
da579188 MK |
601 | else if (not __builtin_is_constant_evaluated()) |
602 | reinterpret_cast<__may_alias<_Tp>*>(this)[__i] = __val; | |
2bcceb6f | 603 | #endif |
da579188 MK |
604 | else if (__i < simd_size_v<_Tp, _Abi0>) |
605 | _M_subscript_write(__i, __val); | |
606 | else | |
607 | second._M_set(__i - simd_size_v<_Tp, _Abi0>, __val); | |
2bcceb6f MK |
608 | } |
609 | ||
610 | private: | |
611 | // _M_subscript_read/_write {{{ | |
da579188 | 612 | _GLIBCXX_SIMD_INTRINSIC constexpr _Tp |
b31186e5 | 613 | _M_subscript_read([[maybe_unused]] size_t __i) const noexcept |
2bcceb6f MK |
614 | { |
615 | if constexpr (__is_vectorizable_v<_FirstType>) | |
616 | return first; | |
617 | else | |
618 | return first[__i]; | |
619 | } | |
620 | ||
da579188 | 621 | _GLIBCXX_SIMD_INTRINSIC constexpr void |
b31186e5 | 622 | _M_subscript_write([[maybe_unused]] size_t __i, _Tp __y) noexcept |
2bcceb6f MK |
623 | { |
624 | if constexpr (__is_vectorizable_v<_FirstType>) | |
625 | first = __y; | |
626 | else | |
627 | first._M_set(__i, __y); | |
628 | } | |
629 | ||
630 | // }}} | |
631 | }; | |
632 | ||
633 | // __make_simd_tuple {{{1 | |
634 | template <typename _Tp, typename _A0> | |
da579188 | 635 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0> |
2bcceb6f MK |
636 | __make_simd_tuple(simd<_Tp, _A0> __x0) |
637 | { return {__data(__x0)}; } | |
638 | ||
639 | template <typename _Tp, typename _A0, typename... _As> | |
da579188 | 640 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0, _As...> |
2bcceb6f MK |
641 | __make_simd_tuple(const simd<_Tp, _A0>& __x0, const simd<_Tp, _As>&... __xs) |
642 | { return {__data(__x0), __make_simd_tuple(__xs...)}; } | |
643 | ||
644 | template <typename _Tp, typename _A0> | |
da579188 | 645 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0> |
2bcceb6f MK |
646 | __make_simd_tuple(const typename _SimdTraits<_Tp, _A0>::_SimdMember& __arg0) |
647 | { return {__arg0}; } | |
648 | ||
649 | template <typename _Tp, typename _A0, typename _A1, typename... _Abis> | |
da579188 | 650 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0, _A1, _Abis...> |
2bcceb6f MK |
651 | __make_simd_tuple( |
652 | const typename _SimdTraits<_Tp, _A0>::_SimdMember& __arg0, | |
653 | const typename _SimdTraits<_Tp, _A1>::_SimdMember& __arg1, | |
654 | const typename _SimdTraits<_Tp, _Abis>::_SimdMember&... __args) | |
655 | { return {__arg0, __make_simd_tuple<_Tp, _A1, _Abis...>(__arg1, __args...)}; } | |
656 | ||
657 | // __to_simd_tuple {{{1 | |
658 | template <typename _Tp, size_t _Np, typename _V, size_t _NV, typename... _VX> | |
659 | _GLIBCXX_SIMD_INTRINSIC constexpr __fixed_size_storage_t<_Tp, _Np> | |
660 | __to_simd_tuple(const array<_V, _NV>& __from, const _VX... __fromX); | |
661 | ||
662 | template <typename _Tp, size_t _Np, | |
663 | size_t _Offset = 0, // skip this many elements in __from0 | |
664 | typename _R = __fixed_size_storage_t<_Tp, _Np>, typename _V0, | |
665 | typename _V0VT = _VectorTraits<_V0>, typename... _VX> | |
b31186e5 | 666 | _GLIBCXX_SIMD_INTRINSIC _R constexpr __to_simd_tuple(const _V0 __from0, const _VX... __fromX) |
2bcceb6f MK |
667 | { |
668 | static_assert(is_same_v<typename _V0VT::value_type, _Tp>); | |
669 | static_assert(_Offset < _V0VT::_S_full_size); | |
670 | using _R0 = __vector_type_t<_Tp, _R::_S_first_size>; | |
671 | if constexpr (_R::_S_tuple_size == 1) | |
672 | { | |
673 | if constexpr (_Np == 1) | |
674 | return _R{__from0[_Offset]}; | |
675 | else if constexpr (_Offset == 0 && _V0VT::_S_full_size >= _Np) | |
676 | return _R{__intrin_bitcast<_R0>(__from0)}; | |
677 | else if constexpr (_Offset * 2 == _V0VT::_S_full_size | |
678 | && _V0VT::_S_full_size / 2 >= _Np) | |
679 | return _R{__intrin_bitcast<_R0>(__extract_part<1, 2>(__from0))}; | |
680 | else if constexpr (_Offset * 4 == _V0VT::_S_full_size | |
681 | && _V0VT::_S_full_size / 4 >= _Np) | |
682 | return _R{__intrin_bitcast<_R0>(__extract_part<1, 4>(__from0))}; | |
683 | else | |
684 | __assert_unreachable<_Tp>(); | |
685 | } | |
686 | else | |
687 | { | |
688 | if constexpr (1 == _R::_S_first_size) | |
689 | { // extract one scalar and recurse | |
690 | if constexpr (_Offset + 1 < _V0VT::_S_full_size) | |
691 | return _R{__from0[_Offset], | |
692 | __to_simd_tuple<_Tp, _Np - 1, _Offset + 1>(__from0, | |
693 | __fromX...)}; | |
694 | else | |
695 | return _R{__from0[_Offset], | |
696 | __to_simd_tuple<_Tp, _Np - 1, 0>(__fromX...)}; | |
697 | } | |
698 | ||
699 | // place __from0 into _R::first and recurse for __fromX -> _R::second | |
700 | else if constexpr (_V0VT::_S_full_size == _R::_S_first_size | |
701 | && _Offset == 0) | |
702 | return _R{__from0, | |
703 | __to_simd_tuple<_Tp, _Np - _R::_S_first_size>(__fromX...)}; | |
704 | ||
705 | // place lower part of __from0 into _R::first and recurse with _Offset | |
706 | else if constexpr (_V0VT::_S_full_size > _R::_S_first_size | |
707 | && _Offset == 0) | |
708 | return _R{__intrin_bitcast<_R0>(__from0), | |
709 | __to_simd_tuple<_Tp, _Np - _R::_S_first_size, | |
710 | _R::_S_first_size>(__from0, __fromX...)}; | |
711 | ||
712 | // place lower part of second quarter of __from0 into _R::first and | |
713 | // recurse with _Offset | |
714 | else if constexpr (_Offset * 4 == _V0VT::_S_full_size | |
715 | && _V0VT::_S_full_size >= 4 * _R::_S_first_size) | |
716 | return _R{__intrin_bitcast<_R0>(__extract_part<2, 4>(__from0)), | |
717 | __to_simd_tuple<_Tp, _Np - _R::_S_first_size, | |
718 | _Offset + _R::_S_first_size>(__from0, | |
719 | __fromX...)}; | |
720 | ||
721 | // place lower half of high half of __from0 into _R::first and recurse | |
722 | // with _Offset | |
723 | else if constexpr (_Offset * 2 == _V0VT::_S_full_size | |
724 | && _V0VT::_S_full_size >= 4 * _R::_S_first_size) | |
725 | return _R{__intrin_bitcast<_R0>(__extract_part<2, 4>(__from0)), | |
726 | __to_simd_tuple<_Tp, _Np - _R::_S_first_size, | |
727 | _Offset + _R::_S_first_size>(__from0, | |
728 | __fromX...)}; | |
729 | ||
730 | // place high half of __from0 into _R::first and recurse with __fromX | |
731 | else if constexpr (_Offset * 2 == _V0VT::_S_full_size | |
732 | && _V0VT::_S_full_size / 2 >= _R::_S_first_size) | |
733 | return _R{__intrin_bitcast<_R0>(__extract_part<1, 2>(__from0)), | |
734 | __to_simd_tuple<_Tp, _Np - _R::_S_first_size, 0>( | |
735 | __fromX...)}; | |
736 | ||
737 | // ill-formed if some unforseen pattern is needed | |
738 | else | |
739 | __assert_unreachable<_Tp>(); | |
740 | } | |
741 | } | |
742 | ||
743 | template <typename _Tp, size_t _Np, typename _V, size_t _NV, typename... _VX> | |
744 | _GLIBCXX_SIMD_INTRINSIC constexpr __fixed_size_storage_t<_Tp, _Np> | |
745 | __to_simd_tuple(const array<_V, _NV>& __from, const _VX... __fromX) | |
746 | { | |
747 | if constexpr (is_same_v<_Tp, _V>) | |
748 | { | |
749 | static_assert( | |
750 | sizeof...(_VX) == 0, | |
751 | "An array of scalars must be the last argument to __to_simd_tuple"); | |
752 | return __call_with_subscripts( | |
53b55701 MK |
753 | __from, make_index_sequence<_NV>(), |
754 | [&](const auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { | |
755 | return __simd_tuple_concat( | |
756 | _SimdTuple<_Tp, simd_abi::scalar>{__args}..., _SimdTuple<_Tp>()); | |
757 | }); | |
2bcceb6f MK |
758 | } |
759 | else | |
760 | return __call_with_subscripts( | |
53b55701 MK |
761 | __from, make_index_sequence<_NV>(), |
762 | [&](const auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { | |
763 | return __to_simd_tuple<_Tp, _Np>(__args..., __fromX...); | |
764 | }); | |
2bcceb6f MK |
765 | } |
766 | ||
767 | template <size_t, typename _Tp> | |
768 | using __to_tuple_helper = _Tp; | |
769 | ||
770 | template <typename _Tp, typename _A0, size_t _NOut, size_t _Np, | |
771 | size_t... _Indexes> | |
772 | _GLIBCXX_SIMD_INTRINSIC __fixed_size_storage_t<_Tp, _NOut> | |
773 | __to_simd_tuple_impl(index_sequence<_Indexes...>, | |
774 | const array<__vector_type_t<_Tp, simd_size_v<_Tp, _A0>>, _Np>& __args) | |
775 | { | |
776 | return __make_simd_tuple<_Tp, __to_tuple_helper<_Indexes, _A0>...>( | |
777 | __args[_Indexes]...); | |
778 | } | |
779 | ||
780 | template <typename _Tp, typename _A0, size_t _NOut, size_t _Np, | |
781 | typename _R = __fixed_size_storage_t<_Tp, _NOut>> | |
782 | _GLIBCXX_SIMD_INTRINSIC _R | |
783 | __to_simd_tuple_sized( | |
784 | const array<__vector_type_t<_Tp, simd_size_v<_Tp, _A0>>, _Np>& __args) | |
785 | { | |
786 | static_assert(_Np * simd_size_v<_Tp, _A0> >= _NOut); | |
787 | return __to_simd_tuple_impl<_Tp, _A0, _NOut>( | |
788 | make_index_sequence<_R::_S_tuple_size>(), __args); | |
789 | } | |
790 | ||
791 | // __optimize_simd_tuple {{{1 | |
792 | template <typename _Tp> | |
da579188 | 793 | _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp> |
2bcceb6f MK |
794 | __optimize_simd_tuple(const _SimdTuple<_Tp>) |
795 | { return {}; } | |
796 | ||
797 | template <typename _Tp, typename _Ap> | |
da579188 | 798 | _GLIBCXX_SIMD_INTRINSIC constexpr const _SimdTuple<_Tp, _Ap>& |
2bcceb6f MK |
799 | __optimize_simd_tuple(const _SimdTuple<_Tp, _Ap>& __x) |
800 | { return __x; } | |
801 | ||
802 | template <typename _Tp, typename _A0, typename _A1, typename... _Abis, | |
803 | typename _R = __fixed_size_storage_t< | |
804 | _Tp, _SimdTuple<_Tp, _A0, _A1, _Abis...>::_S_size()>> | |
da579188 | 805 | _GLIBCXX_SIMD_INTRINSIC constexpr _R |
2bcceb6f MK |
806 | __optimize_simd_tuple(const _SimdTuple<_Tp, _A0, _A1, _Abis...>& __x) |
807 | { | |
808 | using _Tup = _SimdTuple<_Tp, _A0, _A1, _Abis...>; | |
809 | if constexpr (is_same_v<_R, _Tup>) | |
810 | return __x; | |
811 | else if constexpr (is_same_v<typename _R::_FirstType, | |
812 | typename _Tup::_FirstType>) | |
813 | return {__x.first, __optimize_simd_tuple(__x.second)}; | |
814 | else if constexpr (__is_scalar_abi<_A0>() | |
815 | || _A0::template _S_is_partial<_Tp>) | |
816 | return {__generate_from_n_evaluations<_R::_S_first_size, | |
817 | typename _R::_FirstType>( | |
53b55701 | 818 | [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }), |
2bcceb6f MK |
819 | __optimize_simd_tuple( |
820 | __simd_tuple_pop_front<_R::_S_first_size>(__x))}; | |
821 | else if constexpr (is_same_v<_A0, _A1> | |
822 | && _R::_S_first_size == simd_size_v<_Tp, _A0> + simd_size_v<_Tp, _A1>) | |
823 | return {__concat(__x.template _M_at<0>(), __x.template _M_at<1>()), | |
824 | __optimize_simd_tuple(__x.second.second)}; | |
825 | else if constexpr (sizeof...(_Abis) >= 2 | |
826 | && _R::_S_first_size == (4 * simd_size_v<_Tp, _A0>) | |
827 | && simd_size_v<_Tp, _A0> == __simd_tuple_element_t< | |
828 | (sizeof...(_Abis) >= 2 ? 3 : 0), _Tup>::size()) | |
829 | return { | |
830 | __concat(__concat(__x.template _M_at<0>(), __x.template _M_at<1>()), | |
831 | __concat(__x.template _M_at<2>(), __x.template _M_at<3>())), | |
832 | __optimize_simd_tuple(__x.second.second.second.second)}; | |
833 | else | |
834 | { | |
835 | static_assert(sizeof(_R) == sizeof(__x)); | |
836 | _R __r; | |
837 | __builtin_memcpy(__r._M_as_charptr(), __x._M_as_charptr(), | |
838 | sizeof(_Tp) * _R::_S_size()); | |
839 | return __r; | |
840 | } | |
841 | } | |
842 | ||
843 | // __for_each(const _SimdTuple &, Fun) {{{1 | |
844 | template <size_t _Offset = 0, typename _Tp, typename _A0, typename _Fp> | |
845 | _GLIBCXX_SIMD_INTRINSIC constexpr void | |
846 | __for_each(const _SimdTuple<_Tp, _A0>& __t, _Fp&& __fun) | |
847 | { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__t), __t.first); } | |
848 | ||
849 | template <size_t _Offset = 0, typename _Tp, typename _A0, typename _A1, | |
850 | typename... _As, typename _Fp> | |
851 | _GLIBCXX_SIMD_INTRINSIC constexpr void | |
852 | __for_each(const _SimdTuple<_Tp, _A0, _A1, _As...>& __t, _Fp&& __fun) | |
853 | { | |
854 | __fun(__make_meta<_Offset>(__t), __t.first); | |
855 | __for_each<_Offset + simd_size<_Tp, _A0>::value>(__t.second, | |
856 | static_cast<_Fp&&>(__fun)); | |
857 | } | |
858 | ||
859 | // __for_each(_SimdTuple &, Fun) {{{1 | |
860 | template <size_t _Offset = 0, typename _Tp, typename _A0, typename _Fp> | |
861 | _GLIBCXX_SIMD_INTRINSIC constexpr void | |
862 | __for_each(_SimdTuple<_Tp, _A0>& __t, _Fp&& __fun) | |
863 | { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__t), __t.first); } | |
864 | ||
865 | template <size_t _Offset = 0, typename _Tp, typename _A0, typename _A1, | |
866 | typename... _As, typename _Fp> | |
867 | _GLIBCXX_SIMD_INTRINSIC constexpr void | |
868 | __for_each(_SimdTuple<_Tp, _A0, _A1, _As...>& __t, _Fp&& __fun) | |
869 | { | |
870 | __fun(__make_meta<_Offset>(__t), __t.first); | |
871 | __for_each<_Offset + simd_size<_Tp, _A0>::value>(__t.second, | |
872 | static_cast<_Fp&&>(__fun)); | |
873 | } | |
874 | ||
875 | // __for_each(_SimdTuple &, const _SimdTuple &, Fun) {{{1 | |
876 | template <size_t _Offset = 0, typename _Tp, typename _A0, typename _Fp> | |
877 | _GLIBCXX_SIMD_INTRINSIC constexpr void | |
b31186e5 MK |
878 | __for_each(_SimdTuple<_Tp, _A0>& __a, const _SimdTuple<_Tp, _A0>& __b, _Fp&& __fun) |
879 | { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__a), __a.first, __b.first); } | |
2bcceb6f MK |
880 | |
881 | template <size_t _Offset = 0, typename _Tp, typename _A0, typename _A1, | |
882 | typename... _As, typename _Fp> | |
883 | _GLIBCXX_SIMD_INTRINSIC constexpr void | |
884 | __for_each(_SimdTuple<_Tp, _A0, _A1, _As...>& __a, | |
885 | const _SimdTuple<_Tp, _A0, _A1, _As...>& __b, _Fp&& __fun) | |
886 | { | |
887 | __fun(__make_meta<_Offset>(__a), __a.first, __b.first); | |
888 | __for_each<_Offset + simd_size<_Tp, _A0>::value>(__a.second, __b.second, | |
889 | static_cast<_Fp&&>(__fun)); | |
890 | } | |
891 | ||
892 | // __for_each(const _SimdTuple &, const _SimdTuple &, Fun) {{{1 | |
893 | template <size_t _Offset = 0, typename _Tp, typename _A0, typename _Fp> | |
894 | _GLIBCXX_SIMD_INTRINSIC constexpr void | |
b31186e5 MK |
895 | __for_each(const _SimdTuple<_Tp, _A0>& __a, const _SimdTuple<_Tp, _A0>& __b, _Fp&& __fun) |
896 | { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__a), __a.first, __b.first); } | |
2bcceb6f MK |
897 | |
898 | template <size_t _Offset = 0, typename _Tp, typename _A0, typename _A1, | |
899 | typename... _As, typename _Fp> | |
900 | _GLIBCXX_SIMD_INTRINSIC constexpr void | |
901 | __for_each(const _SimdTuple<_Tp, _A0, _A1, _As...>& __a, | |
902 | const _SimdTuple<_Tp, _A0, _A1, _As...>& __b, _Fp&& __fun) | |
903 | { | |
904 | __fun(__make_meta<_Offset>(__a), __a.first, __b.first); | |
905 | __for_each<_Offset + simd_size<_Tp, _A0>::value>(__a.second, __b.second, | |
906 | static_cast<_Fp&&>(__fun)); | |
907 | } | |
908 | ||
909 | // }}}1 | |
910 | // __extract_part(_SimdTuple) {{{ | |
b31186e5 | 911 | template <int _Index, int _Total, int _Combine, typename _Tp, typename _A0, typename... _As> |
da579188 | 912 | _GLIBCXX_SIMD_INTRINSIC constexpr auto // __vector_type_t or _SimdTuple |
2bcceb6f MK |
913 | __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x) |
914 | { | |
915 | // worst cases: | |
916 | // (a) 4, 4, 4 => 3, 3, 3, 3 (_Total = 4) | |
917 | // (b) 2, 2, 2 => 3, 3 (_Total = 2) | |
918 | // (c) 4, 2 => 2, 2, 2 (_Total = 3) | |
919 | using _Tuple = _SimdTuple<_Tp, _A0, _As...>; | |
920 | static_assert(_Index + _Combine <= _Total && _Index >= 0 && _Total >= 1); | |
921 | constexpr size_t _Np = _Tuple::_S_size(); | |
922 | static_assert(_Np >= _Total && _Np % _Total == 0); | |
923 | constexpr size_t __values_per_part = _Np / _Total; | |
924 | [[maybe_unused]] constexpr size_t __values_to_skip | |
925 | = _Index * __values_per_part; | |
926 | constexpr size_t __return_size = __values_per_part * _Combine; | |
927 | using _RetAbi = simd_abi::deduce_t<_Tp, __return_size>; | |
928 | ||
929 | // handle (optimize) the simple cases | |
930 | if constexpr (_Index == 0 && _Tuple::_S_first_size == __return_size) | |
931 | return __x.first._M_data; | |
932 | else if constexpr (_Index == 0 && _Total == _Combine) | |
933 | return __x; | |
934 | else if constexpr (_Index == 0 && _Tuple::_S_first_size >= __return_size) | |
935 | return __intrin_bitcast<__vector_type_t<_Tp, __return_size>>( | |
936 | __as_vector(__x.first)); | |
937 | ||
938 | // recurse to skip unused data members at the beginning of _SimdTuple | |
939 | else if constexpr (__values_to_skip >= _Tuple::_S_first_size) | |
940 | { // recurse | |
941 | if constexpr (_Tuple::_S_first_size % __values_per_part == 0) | |
942 | { | |
943 | constexpr int __parts_in_first | |
944 | = _Tuple::_S_first_size / __values_per_part; | |
945 | return __extract_part<_Index - __parts_in_first, | |
946 | _Total - __parts_in_first, _Combine>( | |
947 | __x.second); | |
948 | } | |
949 | else | |
950 | return __extract_part<__values_to_skip - _Tuple::_S_first_size, | |
951 | _Np - _Tuple::_S_first_size, __return_size>( | |
952 | __x.second); | |
953 | } | |
954 | ||
955 | // extract from multiple _SimdTuple data members | |
956 | else if constexpr (__return_size > _Tuple::_S_first_size - __values_to_skip) | |
957 | { | |
958 | #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS | |
959 | const __may_alias<_Tp>* const element_ptr | |
960 | = reinterpret_cast<const __may_alias<_Tp>*>(&__x) + __values_to_skip; | |
961 | return __as_vector(simd<_Tp, _RetAbi>(element_ptr, element_aligned)); | |
962 | #else | |
963 | [[maybe_unused]] constexpr size_t __offset = __values_to_skip; | |
53b55701 MK |
964 | return __as_vector(simd<_Tp, _RetAbi>( |
965 | [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { | |
966 | constexpr _SizeConstant<__i + __offset> __k; | |
967 | return __x[__k]; | |
968 | })); | |
2bcceb6f MK |
969 | #endif |
970 | } | |
971 | ||
972 | // all of the return values are in __x.first | |
973 | else if constexpr (_Tuple::_S_first_size % __values_per_part == 0) | |
974 | return __extract_part<_Index, _Tuple::_S_first_size / __values_per_part, | |
975 | _Combine>(__x.first); | |
976 | else | |
977 | return __extract_part<__values_to_skip, _Tuple::_S_first_size, | |
978 | _Combine * __values_per_part>(__x.first); | |
979 | } | |
980 | ||
981 | // }}} | |
982 | // __fixed_size_storage_t<_Tp, _Np>{{{ | |
983 | template <typename _Tp, int _Np, typename _Tuple, | |
984 | typename _Next = simd<_Tp, _AllNativeAbis::_BestAbi<_Tp, _Np>>, | |
985 | int _Remain = _Np - int(_Next::size())> | |
986 | struct __fixed_size_storage_builder; | |
987 | ||
988 | template <typename _Tp, int _Np> | |
989 | struct __fixed_size_storage | |
990 | : public __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp>> {}; | |
991 | ||
992 | template <typename _Tp, int _Np, typename... _As, typename _Next> | |
993 | struct __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp, _As...>, _Next, | |
994 | 0> | |
995 | { using type = _SimdTuple<_Tp, _As..., typename _Next::abi_type>; }; | |
996 | ||
997 | template <typename _Tp, int _Np, typename... _As, typename _Next, int _Remain> | |
998 | struct __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp, _As...>, _Next, | |
999 | _Remain> | |
1000 | { | |
1001 | using type = typename __fixed_size_storage_builder< | |
1002 | _Tp, _Remain, _SimdTuple<_Tp, _As..., typename _Next::abi_type>>::type; | |
1003 | }; | |
1004 | ||
2bcceb6f MK |
1005 | // }}} |
1006 | // __autocvt_to_simd {{{ | |
1007 | template <typename _Tp, bool = is_arithmetic_v<__remove_cvref_t<_Tp>>> | |
1008 | struct __autocvt_to_simd | |
1009 | { | |
1010 | _Tp _M_data; | |
1011 | using _TT = __remove_cvref_t<_Tp>; | |
1012 | ||
da579188 | 1013 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2bcceb6f MK |
1014 | operator _TT() |
1015 | { return _M_data; } | |
1016 | ||
da579188 | 1017 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2bcceb6f MK |
1018 | operator _TT&() |
1019 | { | |
1020 | static_assert(is_lvalue_reference<_Tp>::value, ""); | |
1021 | static_assert(!is_const<_Tp>::value, ""); | |
1022 | return _M_data; | |
1023 | } | |
1024 | ||
da579188 | 1025 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2bcceb6f MK |
1026 | operator _TT*() |
1027 | { | |
1028 | static_assert(is_lvalue_reference<_Tp>::value, ""); | |
1029 | static_assert(!is_const<_Tp>::value, ""); | |
1030 | return &_M_data; | |
1031 | } | |
1032 | ||
b31186e5 MK |
1033 | _GLIBCXX_SIMD_INTRINSIC constexpr |
1034 | __autocvt_to_simd(_Tp dd) : _M_data(dd) {} | |
2bcceb6f MK |
1035 | |
1036 | template <typename _Abi> | |
da579188 | 1037 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2bcceb6f MK |
1038 | operator simd<typename _TT::value_type, _Abi>() |
1039 | { return {__private_init, _M_data}; } | |
1040 | ||
1041 | template <typename _Abi> | |
da579188 | 1042 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2bcceb6f | 1043 | operator simd<typename _TT::value_type, _Abi>&() |
b31186e5 | 1044 | { return *reinterpret_cast<simd<typename _TT::value_type, _Abi>*>(&_M_data); } |
2bcceb6f MK |
1045 | |
1046 | template <typename _Abi> | |
da579188 | 1047 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2bcceb6f | 1048 | operator simd<typename _TT::value_type, _Abi>*() |
b31186e5 | 1049 | { return reinterpret_cast<simd<typename _TT::value_type, _Abi>*>(&_M_data); } |
2bcceb6f MK |
1050 | }; |
1051 | ||
1052 | template <typename _Tp> | |
1053 | __autocvt_to_simd(_Tp &&) -> __autocvt_to_simd<_Tp>; | |
1054 | ||
1055 | template <typename _Tp> | |
1056 | struct __autocvt_to_simd<_Tp, true> | |
1057 | { | |
1058 | using _TT = __remove_cvref_t<_Tp>; | |
1059 | _Tp _M_data; | |
1060 | fixed_size_simd<_TT, 1> _M_fd; | |
1061 | ||
52d28210 MK |
1062 | _GLIBCXX_SIMD_INTRINSIC |
1063 | constexpr __autocvt_to_simd(_Tp dd) : _M_data(dd), _M_fd(_M_data) {} | |
2bcceb6f | 1064 | |
52d28210 | 1065 | _GLIBCXX_SIMD_INTRINSIC |
2bcceb6f MK |
1066 | ~__autocvt_to_simd() |
1067 | { _M_data = __data(_M_fd).first; } | |
1068 | ||
da579188 | 1069 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2bcceb6f MK |
1070 | operator fixed_size_simd<_TT, 1>() |
1071 | { return _M_fd; } | |
1072 | ||
da579188 | 1073 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2bcceb6f MK |
1074 | operator fixed_size_simd<_TT, 1> &() |
1075 | { | |
1076 | static_assert(is_lvalue_reference<_Tp>::value, ""); | |
1077 | static_assert(!is_const<_Tp>::value, ""); | |
1078 | return _M_fd; | |
1079 | } | |
1080 | ||
da579188 | 1081 | _GLIBCXX_SIMD_INTRINSIC constexpr |
2bcceb6f MK |
1082 | operator fixed_size_simd<_TT, 1> *() |
1083 | { | |
1084 | static_assert(is_lvalue_reference<_Tp>::value, ""); | |
1085 | static_assert(!is_const<_Tp>::value, ""); | |
1086 | return &_M_fd; | |
1087 | } | |
1088 | }; | |
1089 | ||
1090 | // }}} | |
1091 | ||
1092 | struct _CommonImplFixedSize; | |
52d28210 MK |
1093 | template <int _Np, typename = __detail::__odr_helper> struct _SimdImplFixedSize; |
1094 | template <int _Np, typename = __detail::__odr_helper> struct _MaskImplFixedSize; | |
2bcceb6f MK |
1095 | // simd_abi::_Fixed {{{ |
1096 | template <int _Np> | |
1097 | struct simd_abi::_Fixed | |
1098 | { | |
1099 | template <typename _Tp> static constexpr size_t _S_size = _Np; | |
1100 | template <typename _Tp> static constexpr size_t _S_full_size = _Np; | |
1101 | // validity traits {{{ | |
1102 | struct _IsValidAbiTag : public __bool_constant<(_Np > 0)> {}; | |
1103 | ||
1104 | template <typename _Tp> | |
1105 | struct _IsValidSizeFor | |
1106 | : __bool_constant<(_Np <= simd_abi::max_fixed_size<_Tp>)> {}; | |
1107 | ||
1108 | template <typename _Tp> | |
1109 | struct _IsValid : conjunction<_IsValidAbiTag, __is_vectorizable<_Tp>, | |
1110 | _IsValidSizeFor<_Tp>> {}; | |
1111 | ||
1112 | template <typename _Tp> | |
1113 | static constexpr bool _S_is_valid_v = _IsValid<_Tp>::value; | |
1114 | ||
1115 | // }}} | |
1116 | // _S_masked {{{ | |
1117 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np> | |
1118 | _S_masked(_BitMask<_Np> __x) | |
1119 | { return __x._M_sanitized(); } | |
1120 | ||
1121 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np> | |
1122 | _S_masked(_SanitizedBitMask<_Np> __x) | |
1123 | { return __x; } | |
1124 | ||
1125 | // }}} | |
1126 | // _*Impl {{{ | |
1127 | using _CommonImpl = _CommonImplFixedSize; | |
1128 | using _SimdImpl = _SimdImplFixedSize<_Np>; | |
1129 | using _MaskImpl = _MaskImplFixedSize<_Np>; | |
1130 | ||
1131 | // }}} | |
1132 | // __traits {{{ | |
1133 | template <typename _Tp, bool = _S_is_valid_v<_Tp>> | |
1134 | struct __traits : _InvalidTraits {}; | |
1135 | ||
1136 | template <typename _Tp> | |
1137 | struct __traits<_Tp, true> | |
1138 | { | |
1139 | using _IsValid = true_type; | |
1140 | using _SimdImpl = _SimdImplFixedSize<_Np>; | |
1141 | using _MaskImpl = _MaskImplFixedSize<_Np>; | |
1142 | ||
1143 | // simd and simd_mask member types {{{ | |
1144 | using _SimdMember = __fixed_size_storage_t<_Tp, _Np>; | |
1145 | using _MaskMember = _SanitizedBitMask<_Np>; | |
1146 | ||
1147 | static constexpr size_t _S_simd_align | |
1148 | = std::__bit_ceil(_Np * sizeof(_Tp)); | |
1149 | ||
1150 | static constexpr size_t _S_mask_align = alignof(_MaskMember); | |
1151 | ||
1152 | // }}} | |
1153 | // _SimdBase / base class for simd, providing extra conversions {{{ | |
1154 | struct _SimdBase | |
1155 | { | |
1156 | // The following ensures, function arguments are passed via the stack. | |
1157 | // This is important for ABI compatibility across TU boundaries | |
da579188 | 1158 | _GLIBCXX_SIMD_ALWAYS_INLINE constexpr |
2bcceb6f | 1159 | _SimdBase(const _SimdBase&) {} |
da579188 | 1160 | |
2bcceb6f MK |
1161 | _SimdBase() = default; |
1162 | ||
da579188 | 1163 | _GLIBCXX_SIMD_ALWAYS_INLINE constexpr explicit |
b31186e5 | 1164 | operator const _SimdMember &() const |
2bcceb6f MK |
1165 | { return static_cast<const simd<_Tp, _Fixed>*>(this)->_M_data; } |
1166 | ||
da579188 | 1167 | _GLIBCXX_SIMD_ALWAYS_INLINE constexpr explicit |
b31186e5 | 1168 | operator array<_Tp, _Np>() const |
2bcceb6f MK |
1169 | { |
1170 | array<_Tp, _Np> __r; | |
1171 | // _SimdMember can be larger because of higher alignment | |
1172 | static_assert(sizeof(__r) <= sizeof(_SimdMember), ""); | |
1173 | __builtin_memcpy(__r.data(), &static_cast<const _SimdMember&>(*this), | |
1174 | sizeof(__r)); | |
1175 | return __r; | |
1176 | } | |
1177 | }; | |
1178 | ||
1179 | // }}} | |
1180 | // _MaskBase {{{ | |
1181 | // empty. The bitset interface suffices | |
1182 | struct _MaskBase {}; | |
1183 | ||
1184 | // }}} | |
1185 | // _SimdCastType {{{ | |
1186 | struct _SimdCastType | |
1187 | { | |
da579188 | 1188 | _GLIBCXX_SIMD_ALWAYS_INLINE constexpr |
2bcceb6f | 1189 | _SimdCastType(const array<_Tp, _Np>&); |
b31186e5 | 1190 | |
da579188 | 1191 | _GLIBCXX_SIMD_ALWAYS_INLINE constexpr |
2bcceb6f | 1192 | _SimdCastType(const _SimdMember& dd) : _M_data(dd) {} |
b31186e5 | 1193 | |
da579188 | 1194 | _GLIBCXX_SIMD_ALWAYS_INLINE constexpr explicit |
b31186e5 | 1195 | operator const _SimdMember &() const { return _M_data; } |
2bcceb6f MK |
1196 | |
1197 | private: | |
1198 | const _SimdMember& _M_data; | |
1199 | }; | |
1200 | ||
1201 | // }}} | |
1202 | // _MaskCastType {{{ | |
1203 | class _MaskCastType | |
1204 | { | |
1205 | _MaskCastType() = delete; | |
1206 | }; | |
1207 | // }}} | |
1208 | }; | |
1209 | // }}} | |
1210 | }; | |
1211 | ||
1212 | // }}} | |
1213 | // _CommonImplFixedSize {{{ | |
1214 | struct _CommonImplFixedSize | |
1215 | { | |
1216 | // _S_store {{{ | |
1217 | template <typename _Tp, typename... _As> | |
1218 | _GLIBCXX_SIMD_INTRINSIC static void | |
1219 | _S_store(const _SimdTuple<_Tp, _As...>& __x, void* __addr) | |
1220 | { | |
1221 | constexpr size_t _Np = _SimdTuple<_Tp, _As...>::_S_size(); | |
1222 | __builtin_memcpy(__addr, &__x, _Np * sizeof(_Tp)); | |
1223 | } | |
1224 | ||
1225 | // }}} | |
1226 | }; | |
1227 | ||
1228 | // }}} | |
1229 | // _SimdImplFixedSize {{{1 | |
1230 | // fixed_size should not inherit from _SimdMathFallback in order for | |
1231 | // specializations in the used _SimdTuple Abis to get used | |
52d28210 | 1232 | template <int _Np, typename> |
2bcceb6f MK |
1233 | struct _SimdImplFixedSize |
1234 | { | |
1235 | // member types {{{2 | |
1236 | using _MaskMember = _SanitizedBitMask<_Np>; | |
1237 | ||
1238 | template <typename _Tp> | |
1239 | using _SimdMember = __fixed_size_storage_t<_Tp, _Np>; | |
1240 | ||
1241 | template <typename _Tp> | |
1242 | static constexpr size_t _S_tuple_size = _SimdMember<_Tp>::_S_tuple_size; | |
1243 | ||
1244 | template <typename _Tp> | |
1245 | using _Simd = simd<_Tp, simd_abi::fixed_size<_Np>>; | |
1246 | ||
1247 | template <typename _Tp> | |
1248 | using _TypeTag = _Tp*; | |
1249 | ||
1250 | // broadcast {{{2 | |
1251 | template <typename _Tp> | |
e37b0432 MK |
1252 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp> |
1253 | _S_broadcast(_Tp __x) noexcept | |
2bcceb6f | 1254 | { |
53b55701 MK |
1255 | return _SimdMember<_Tp>::_S_generate( |
1256 | [&](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { | |
1257 | return __meta._S_broadcast(__x); | |
1258 | }); | |
2bcceb6f MK |
1259 | } |
1260 | ||
1261 | // _S_generator {{{2 | |
1262 | template <typename _Fp, typename _Tp> | |
e37b0432 MK |
1263 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp> |
1264 | _S_generator(_Fp&& __gen, _TypeTag<_Tp>) | |
2bcceb6f | 1265 | { |
53b55701 MK |
1266 | return _SimdMember<_Tp>::_S_generate( |
1267 | [&__gen](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { | |
1268 | return __meta._S_generator( | |
1269 | [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { | |
1270 | return __i < _Np ? __gen(_SizeConstant<__meta._S_offset + __i>()) | |
1271 | : 0; | |
1272 | }, | |
1273 | _TypeTag<_Tp>()); | |
1274 | }); | |
2bcceb6f MK |
1275 | } |
1276 | ||
1277 | // _S_load {{{2 | |
1278 | template <typename _Tp, typename _Up> | |
da579188 | 1279 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp> |
e37b0432 | 1280 | _S_load(const _Up* __mem, _TypeTag<_Tp>) noexcept |
2bcceb6f | 1281 | { |
53b55701 MK |
1282 | return _SimdMember<_Tp>::_S_generate( |
1283 | [&](auto __meta) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { | |
1284 | return __meta._S_load(&__mem[__meta._S_offset], _TypeTag<_Tp>()); | |
1285 | }); | |
2bcceb6f MK |
1286 | } |
1287 | ||
1288 | // _S_masked_load {{{2 | |
1289 | template <typename _Tp, typename... _As, typename _Up> | |
e37b0432 | 1290 | _GLIBCXX_SIMD_INTRINSIC static _SimdTuple<_Tp, _As...> |
2bcceb6f MK |
1291 | _S_masked_load(const _SimdTuple<_Tp, _As...>& __old, |
1292 | const _MaskMember __bits, const _Up* __mem) noexcept | |
1293 | { | |
1294 | auto __merge = __old; | |
53b55701 | 1295 | __for_each(__merge, [&](auto __meta, auto& __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
2bcceb6f MK |
1296 | if (__meta._S_submask(__bits).any()) |
1297 | #pragma GCC diagnostic push | |
da579188 MK |
1298 | // Dereferencing __mem + __meta._S_offset could be UB ([expr.add]/4.3). |
1299 | // It is the responsibility of the caller of the masked load (via the mask's value) to | |
1300 | // avoid UB. Consequently, the compiler may assume this branch is unreachable, if the | |
1301 | // pointer arithmetic is UB. | |
2bcceb6f MK |
1302 | #pragma GCC diagnostic ignored "-Warray-bounds" |
1303 | __native | |
1304 | = __meta._S_masked_load(__native, __meta._S_make_mask(__bits), | |
1305 | __mem + __meta._S_offset); | |
1306 | #pragma GCC diagnostic pop | |
1307 | }); | |
1308 | return __merge; | |
1309 | } | |
1310 | ||
1311 | // _S_store {{{2 | |
1312 | template <typename _Tp, typename _Up> | |
da579188 | 1313 | _GLIBCXX_SIMD_INTRINSIC static constexpr void |
e37b0432 | 1314 | _S_store(const _SimdMember<_Tp>& __v, _Up* __mem, _TypeTag<_Tp>) noexcept |
2bcceb6f | 1315 | { |
53b55701 | 1316 | __for_each(__v, [&](auto __meta, auto __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
2bcceb6f MK |
1317 | __meta._S_store(__native, &__mem[__meta._S_offset], _TypeTag<_Tp>()); |
1318 | }); | |
1319 | } | |
1320 | ||
1321 | // _S_masked_store {{{2 | |
1322 | template <typename _Tp, typename... _As, typename _Up> | |
e37b0432 MK |
1323 | _GLIBCXX_SIMD_INTRINSIC static void |
1324 | _S_masked_store(const _SimdTuple<_Tp, _As...>& __v, _Up* __mem, | |
1325 | const _MaskMember __bits) noexcept | |
2bcceb6f | 1326 | { |
53b55701 | 1327 | __for_each(__v, [&](auto __meta, auto __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
2bcceb6f MK |
1328 | if (__meta._S_submask(__bits).any()) |
1329 | #pragma GCC diagnostic push | |
1330 | // __mem + __mem._S_offset could be UB ([expr.add]/4.3, but it punts | |
1331 | // the responsibility for avoiding UB to the caller of the masked | |
1332 | // store via the mask. Consequently, the compiler may assume this | |
1333 | // branch is unreachable, if the pointer arithmetic is UB. | |
1334 | #pragma GCC diagnostic ignored "-Warray-bounds" | |
1335 | __meta._S_masked_store(__native, __mem + __meta._S_offset, | |
1336 | __meta._S_make_mask(__bits)); | |
1337 | #pragma GCC diagnostic pop | |
1338 | }); | |
1339 | } | |
1340 | ||
1341 | // negation {{{2 | |
1342 | template <typename _Tp, typename... _As> | |
da579188 | 1343 | static constexpr inline _MaskMember |
2bcceb6f MK |
1344 | _S_negate(const _SimdTuple<_Tp, _As...>& __x) noexcept |
1345 | { | |
1346 | _MaskMember __bits = 0; | |
1347 | __for_each( | |
53b55701 | 1348 | __x, [&__bits](auto __meta, auto __native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
2bcceb6f MK |
1349 | __bits |
1350 | |= __meta._S_mask_to_shifted_ullong(__meta._S_negate(__native)); | |
1351 | }); | |
1352 | return __bits; | |
1353 | } | |
1354 | ||
1355 | // reductions {{{2 | |
1356 | template <typename _Tp, typename _BinaryOperation> | |
1357 | static constexpr inline _Tp _S_reduce(const _Simd<_Tp>& __x, | |
1358 | const _BinaryOperation& __binary_op) | |
1359 | { | |
1360 | using _Tup = _SimdMember<_Tp>; | |
1361 | const _Tup& __tup = __data(__x); | |
1362 | if constexpr (_Tup::_S_tuple_size == 1) | |
1363 | return _Tup::_FirstAbi::_SimdImpl::_S_reduce( | |
1364 | __tup.template _M_simd_at<0>(), __binary_op); | |
1365 | else if constexpr (_Tup::_S_tuple_size == 2 && _Tup::_S_size() > 2 | |
1366 | && _Tup::_SecondType::_S_size() == 1) | |
1367 | { | |
1368 | return __binary_op(simd<_Tp, simd_abi::scalar>( | |
1369 | reduce(__tup.template _M_simd_at<0>(), | |
1370 | __binary_op)), | |
1371 | __tup.template _M_simd_at<1>())[0]; | |
1372 | } | |
1373 | else if constexpr (_Tup::_S_tuple_size == 2 && _Tup::_S_size() > 4 | |
1374 | && _Tup::_SecondType::_S_size() == 2) | |
1375 | { | |
1376 | return __binary_op( | |
1377 | simd<_Tp, simd_abi::scalar>( | |
1378 | reduce(__tup.template _M_simd_at<0>(), __binary_op)), | |
1379 | simd<_Tp, simd_abi::scalar>( | |
1380 | reduce(__tup.template _M_simd_at<1>(), __binary_op)))[0]; | |
1381 | } | |
1382 | else | |
1383 | { | |
1384 | const auto& __x2 = __call_with_n_evaluations< | |
1385 | __div_roundup(_Tup::_S_tuple_size, 2)>( | |
53b55701 | 1386 | [](auto __first_simd, auto... __remaining) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
2bcceb6f MK |
1387 | if constexpr (sizeof...(__remaining) == 0) |
1388 | return __first_simd; | |
1389 | else | |
1390 | { | |
1391 | using _Tup2 | |
1392 | = _SimdTuple<_Tp, | |
1393 | typename decltype(__first_simd)::abi_type, | |
1394 | typename decltype(__remaining)::abi_type...>; | |
1395 | return fixed_size_simd<_Tp, _Tup2::_S_size()>( | |
1396 | __private_init, | |
1397 | __make_simd_tuple(__first_simd, __remaining...)); | |
1398 | } | |
1399 | }, | |
53b55701 | 1400 | [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
2bcceb6f MK |
1401 | auto __left = __tup.template _M_simd_at<2 * __i>(); |
1402 | if constexpr (2 * __i + 1 == _Tup::_S_tuple_size) | |
1403 | return __left; | |
1404 | else | |
1405 | { | |
1406 | auto __right = __tup.template _M_simd_at<2 * __i + 1>(); | |
1407 | using _LT = decltype(__left); | |
1408 | using _RT = decltype(__right); | |
1409 | if constexpr (_LT::size() == _RT::size()) | |
1410 | return __binary_op(__left, __right); | |
1411 | else | |
1412 | { | |
1413 | _GLIBCXX_SIMD_USE_CONSTEXPR_API | |
1414 | typename _LT::mask_type __k( | |
1415 | __private_init, | |
53b55701 MK |
1416 | [](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1417 | return __j < _RT::size(); | |
1418 | }); | |
2bcceb6f MK |
1419 | _LT __ext_right = __left; |
1420 | where(__k, __ext_right) | |
1421 | = __proposed::resizing_simd_cast<_LT>(__right); | |
1422 | where(__k, __left) = __binary_op(__left, __ext_right); | |
1423 | return __left; | |
1424 | } | |
1425 | } | |
1426 | }); | |
1427 | return reduce(__x2, __binary_op); | |
1428 | } | |
1429 | } | |
1430 | ||
1431 | // _S_min, _S_max {{{2 | |
1432 | template <typename _Tp, typename... _As> | |
e37b0432 | 1433 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> |
b31186e5 | 1434 | _S_min(const _SimdTuple<_Tp, _As...>& __a, const _SimdTuple<_Tp, _As...>& __b) |
2bcceb6f MK |
1435 | { |
1436 | return __a._M_apply_per_chunk( | |
53b55701 | 1437 | [](auto __impl, auto __aa, auto __bb) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
2bcceb6f MK |
1438 | return __impl._S_min(__aa, __bb); |
1439 | }, | |
1440 | __b); | |
1441 | } | |
1442 | ||
1443 | template <typename _Tp, typename... _As> | |
e37b0432 | 1444 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> |
b31186e5 | 1445 | _S_max(const _SimdTuple<_Tp, _As...>& __a, const _SimdTuple<_Tp, _As...>& __b) |
2bcceb6f MK |
1446 | { |
1447 | return __a._M_apply_per_chunk( | |
53b55701 | 1448 | [](auto __impl, auto __aa, auto __bb) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
2bcceb6f MK |
1449 | return __impl._S_max(__aa, __bb); |
1450 | }, | |
1451 | __b); | |
1452 | } | |
1453 | ||
1454 | // _S_complement {{{2 | |
1455 | template <typename _Tp, typename... _As> | |
e37b0432 | 1456 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> |
2bcceb6f MK |
1457 | _S_complement(const _SimdTuple<_Tp, _As...>& __x) noexcept |
1458 | { | |
53b55701 MK |
1459 | return __x._M_apply_per_chunk( |
1460 | [](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { | |
1461 | return __impl._S_complement(__xx); | |
1462 | }); | |
2bcceb6f MK |
1463 | } |
1464 | ||
1465 | // _S_unary_minus {{{2 | |
1466 | template <typename _Tp, typename... _As> | |
e37b0432 | 1467 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> |
2bcceb6f MK |
1468 | _S_unary_minus(const _SimdTuple<_Tp, _As...>& __x) noexcept |
1469 | { | |
53b55701 MK |
1470 | return __x._M_apply_per_chunk( |
1471 | [](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { | |
1472 | return __impl._S_unary_minus(__xx); | |
1473 | }); | |
2bcceb6f MK |
1474 | } |
1475 | ||
1476 | // arithmetic operators {{{2 | |
1477 | ||
53b55701 MK |
1478 | #define _GLIBCXX_SIMD_FIXED_OP(name_, op_) \ |
1479 | template <typename _Tp, typename... _As> \ | |
e37b0432 | 1480 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> name_( \ |
53b55701 MK |
1481 | const _SimdTuple<_Tp, _As...>& __x, const _SimdTuple<_Tp, _As...>& __y) \ |
1482 | { \ | |
1483 | return __x._M_apply_per_chunk( \ | |
1484 | [](auto __impl, auto __xx, auto __yy) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \ | |
1485 | return __impl.name_(__xx, __yy); \ | |
1486 | }, \ | |
1487 | __y); \ | |
2bcceb6f MK |
1488 | } |
1489 | ||
1490 | _GLIBCXX_SIMD_FIXED_OP(_S_plus, +) | |
1491 | _GLIBCXX_SIMD_FIXED_OP(_S_minus, -) | |
1492 | _GLIBCXX_SIMD_FIXED_OP(_S_multiplies, *) | |
1493 | _GLIBCXX_SIMD_FIXED_OP(_S_divides, /) | |
1494 | _GLIBCXX_SIMD_FIXED_OP(_S_modulus, %) | |
1495 | _GLIBCXX_SIMD_FIXED_OP(_S_bit_and, &) | |
1496 | _GLIBCXX_SIMD_FIXED_OP(_S_bit_or, |) | |
1497 | _GLIBCXX_SIMD_FIXED_OP(_S_bit_xor, ^) | |
1498 | _GLIBCXX_SIMD_FIXED_OP(_S_bit_shift_left, <<) | |
1499 | _GLIBCXX_SIMD_FIXED_OP(_S_bit_shift_right, >>) | |
1500 | #undef _GLIBCXX_SIMD_FIXED_OP | |
1501 | ||
1502 | template <typename _Tp, typename... _As> | |
e37b0432 | 1503 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> |
2bcceb6f MK |
1504 | _S_bit_shift_left(const _SimdTuple<_Tp, _As...>& __x, int __y) |
1505 | { | |
53b55701 MK |
1506 | return __x._M_apply_per_chunk( |
1507 | [__y](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { | |
1508 | return __impl._S_bit_shift_left(__xx, __y); | |
1509 | }); | |
2bcceb6f MK |
1510 | } |
1511 | ||
1512 | template <typename _Tp, typename... _As> | |
e37b0432 | 1513 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> |
2bcceb6f MK |
1514 | _S_bit_shift_right(const _SimdTuple<_Tp, _As...>& __x, int __y) |
1515 | { | |
53b55701 MK |
1516 | return __x._M_apply_per_chunk( |
1517 | [__y](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { | |
1518 | return __impl._S_bit_shift_right(__xx, __y); | |
1519 | }); | |
2bcceb6f MK |
1520 | } |
1521 | ||
1522 | // math {{{2 | |
1523 | #define _GLIBCXX_SIMD_APPLY_ON_TUPLE(_RetTp, __name) \ | |
1524 | template <typename _Tp, typename... _As, typename... _More> \ | |
1525 | static inline __fixed_size_storage_t<_RetTp, _Np> \ | |
1526 | _S_##__name(const _SimdTuple<_Tp, _As...>& __x, \ | |
1527 | const _More&... __more) \ | |
1528 | { \ | |
1529 | if constexpr (sizeof...(_More) == 0) \ | |
1530 | { \ | |
1531 | if constexpr (is_same_v<_Tp, _RetTp>) \ | |
1532 | return __x._M_apply_per_chunk( \ | |
53b55701 MK |
1533 | [](auto __impl, auto __xx) \ |
1534 | constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ | |
1535 | { \ | |
1536 | using _V = typename decltype(__impl)::simd_type; \ | |
1537 | return __data(__name(_V(__private_init, __xx))); \ | |
1538 | }); \ | |
2bcceb6f MK |
1539 | else \ |
1540 | return __optimize_simd_tuple( \ | |
53b55701 MK |
1541 | __x.template _M_apply_r<_RetTp>( \ |
1542 | [](auto __impl, auto __xx) \ | |
1543 | _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ | |
1544 | { return __impl._S_##__name(__xx); })); \ | |
2bcceb6f MK |
1545 | } \ |
1546 | else if constexpr ( \ | |
1547 | is_same_v< \ | |
1548 | _Tp, \ | |
1549 | _RetTp> && (... && is_same_v<_SimdTuple<_Tp, _As...>, _More>) ) \ | |
1550 | return __x._M_apply_per_chunk( \ | |
53b55701 MK |
1551 | [](auto __impl, auto __xx, auto... __pack) \ |
1552 | constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ | |
1553 | { \ | |
1554 | using _V = typename decltype(__impl)::simd_type; \ | |
1555 | return __data(__name(_V(__private_init, __xx), \ | |
1556 | _V(__private_init, __pack)...)); \ | |
1557 | }, __more...); \ | |
2bcceb6f MK |
1558 | else if constexpr (is_same_v<_Tp, _RetTp>) \ |
1559 | return __x._M_apply_per_chunk( \ | |
53b55701 MK |
1560 | [](auto __impl, auto __xx, auto... __pack) \ |
1561 | constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ | |
1562 | { \ | |
1563 | using _V = typename decltype(__impl)::simd_type; \ | |
1564 | return __data(__name(_V(__private_init, __xx), \ | |
1565 | __autocvt_to_simd(__pack)...)); \ | |
1566 | }, __more...); \ | |
2bcceb6f MK |
1567 | else \ |
1568 | __assert_unreachable<_Tp>(); \ | |
1569 | } | |
1570 | ||
1571 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, acos) | |
1572 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, asin) | |
1573 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atan) | |
1574 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atan2) | |
1575 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cos) | |
1576 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sin) | |
1577 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tan) | |
1578 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, acosh) | |
1579 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, asinh) | |
1580 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atanh) | |
1581 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cosh) | |
1582 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sinh) | |
1583 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tanh) | |
1584 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, exp) | |
1585 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, exp2) | |
1586 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, expm1) | |
1587 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(int, ilogb) | |
1588 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log) | |
1589 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log10) | |
1590 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log1p) | |
1591 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log2) | |
1592 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, logb) | |
1593 | // modf implemented in simd_math.h | |
1594 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, | |
1595 | scalbn) // double scalbn(double x, int exp); | |
1596 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, scalbln) | |
1597 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cbrt) | |
1598 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, abs) | |
1599 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fabs) | |
1600 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, pow) | |
1601 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sqrt) | |
1602 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, erf) | |
1603 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, erfc) | |
1604 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, lgamma) | |
1605 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tgamma) | |
1606 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, trunc) | |
1607 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, ceil) | |
1608 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, floor) | |
1609 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, nearbyint) | |
1610 | ||
1611 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, rint) | |
1612 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(long, lrint) | |
1613 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(long long, llrint) | |
1614 | ||
1615 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, round) | |
1616 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(long, lround) | |
1617 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(long long, llround) | |
1618 | ||
1619 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, ldexp) | |
1620 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmod) | |
1621 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, remainder) | |
0237aa8c | 1622 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, copysign) |
2bcceb6f MK |
1623 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, nextafter) |
1624 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fdim) | |
1625 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmax) | |
1626 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmin) | |
1627 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fma) | |
1628 | _GLIBCXX_SIMD_APPLY_ON_TUPLE(int, fpclassify) | |
1629 | #undef _GLIBCXX_SIMD_APPLY_ON_TUPLE | |
1630 | ||
1631 | template <typename _Tp, typename... _Abis> | |
e37b0432 MK |
1632 | static inline _SimdTuple<_Tp, _Abis...> |
1633 | _S_remquo(const _SimdTuple<_Tp, _Abis...>& __x, const _SimdTuple<_Tp, _Abis...>& __y, | |
1634 | __fixed_size_storage_t<int, _SimdTuple<_Tp, _Abis...>::_S_size()>* __z) | |
2bcceb6f MK |
1635 | { |
1636 | return __x._M_apply_per_chunk( | |
53b55701 MK |
1637 | [](auto __impl, const auto __xx, const auto __yy, auto& __zz) |
1638 | _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA | |
1639 | { return __impl._S_remquo(__xx, __yy, &__zz); }, | |
1640 | __y, *__z); | |
2bcceb6f MK |
1641 | } |
1642 | ||
1643 | template <typename _Tp, typename... _As> | |
1644 | static inline _SimdTuple<_Tp, _As...> | |
1645 | _S_frexp(const _SimdTuple<_Tp, _As...>& __x, | |
1646 | __fixed_size_storage_t<int, _Np>& __exp) noexcept | |
1647 | { | |
1648 | return __x._M_apply_per_chunk( | |
53b55701 MK |
1649 | [](auto __impl, const auto& __a, auto& __b) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
1650 | return __data(frexp(typename decltype(__impl)::simd_type(__private_init, __a), | |
1651 | __autocvt_to_simd(__b))); | |
1652 | }, __exp); | |
2bcceb6f MK |
1653 | } |
1654 | ||
e37b0432 MK |
1655 | #define _GLIBCXX_SIMD_TEST_ON_TUPLE_(name_) \ |
1656 | template <typename _Tp, typename... _As> \ | |
1657 | static inline _MaskMember \ | |
b31186e5 | 1658 | _S_##name_(const _SimdTuple<_Tp, _As...>& __x) noexcept \ |
e37b0432 MK |
1659 | { \ |
1660 | return _M_test([] (auto __impl, auto __xx) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \ | |
1661 | return __impl._S_##name_(__xx); \ | |
1662 | }, __x); \ | |
2bcceb6f MK |
1663 | } |
1664 | ||
1665 | _GLIBCXX_SIMD_TEST_ON_TUPLE_(isinf) | |
1666 | _GLIBCXX_SIMD_TEST_ON_TUPLE_(isfinite) | |
1667 | _GLIBCXX_SIMD_TEST_ON_TUPLE_(isnan) | |
1668 | _GLIBCXX_SIMD_TEST_ON_TUPLE_(isnormal) | |
1669 | _GLIBCXX_SIMD_TEST_ON_TUPLE_(signbit) | |
1670 | #undef _GLIBCXX_SIMD_TEST_ON_TUPLE_ | |
1671 | ||
1672 | // _S_increment & _S_decrement{{{2 | |
1673 | template <typename... _Ts> | |
1674 | _GLIBCXX_SIMD_INTRINSIC static constexpr void | |
1675 | _S_increment(_SimdTuple<_Ts...>& __x) | |
1676 | { | |
1677 | __for_each( | |
53b55701 | 1678 | __x, [](auto __meta, auto& native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
2bcceb6f MK |
1679 | __meta._S_increment(native); |
1680 | }); | |
1681 | } | |
1682 | ||
1683 | template <typename... _Ts> | |
1684 | _GLIBCXX_SIMD_INTRINSIC static constexpr void | |
1685 | _S_decrement(_SimdTuple<_Ts...>& __x) | |
1686 | { | |
1687 | __for_each( | |
53b55701 | 1688 | __x, [](auto __meta, auto& native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
2bcceb6f MK |
1689 | __meta._S_decrement(native); |
1690 | }); | |
1691 | } | |
1692 | ||
1693 | // compares {{{2 | |
1694 | #define _GLIBCXX_SIMD_CMP_OPERATIONS(__cmp) \ | |
1695 | template <typename _Tp, typename... _As> \ | |
da579188 | 1696 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember \ |
2bcceb6f MK |
1697 | __cmp(const _SimdTuple<_Tp, _As...>& __x, \ |
1698 | const _SimdTuple<_Tp, _As...>& __y) \ | |
1699 | { \ | |
53b55701 MK |
1700 | return _M_test([](auto __impl, auto __xx, auto __yy) \ |
1701 | constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \ | |
1702 | { return __impl.__cmp(__xx, __yy); }, \ | |
1703 | __x, __y); \ | |
2bcceb6f MK |
1704 | } |
1705 | ||
1706 | _GLIBCXX_SIMD_CMP_OPERATIONS(_S_equal_to) | |
1707 | _GLIBCXX_SIMD_CMP_OPERATIONS(_S_not_equal_to) | |
1708 | _GLIBCXX_SIMD_CMP_OPERATIONS(_S_less) | |
1709 | _GLIBCXX_SIMD_CMP_OPERATIONS(_S_less_equal) | |
1710 | _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isless) | |
1711 | _GLIBCXX_SIMD_CMP_OPERATIONS(_S_islessequal) | |
1712 | _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isgreater) | |
1713 | _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isgreaterequal) | |
1714 | _GLIBCXX_SIMD_CMP_OPERATIONS(_S_islessgreater) | |
1715 | _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isunordered) | |
1716 | #undef _GLIBCXX_SIMD_CMP_OPERATIONS | |
1717 | ||
1718 | // smart_reference access {{{2 | |
1719 | template <typename _Tp, typename... _As, typename _Up> | |
da579188 | 1720 | _GLIBCXX_SIMD_INTRINSIC static constexpr void |
b31186e5 | 1721 | _S_set(_SimdTuple<_Tp, _As...>& __v, int __i, _Up&& __x) noexcept |
2bcceb6f MK |
1722 | { __v._M_set(__i, static_cast<_Up&&>(__x)); } |
1723 | ||
1724 | // _S_masked_assign {{{2 | |
1725 | template <typename _Tp, typename... _As> | |
da579188 | 1726 | _GLIBCXX_SIMD_INTRINSIC static constexpr void |
2bcceb6f MK |
1727 | _S_masked_assign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, |
1728 | const __type_identity_t<_SimdTuple<_Tp, _As...>>& __rhs) | |
1729 | { | |
53b55701 MK |
1730 | __for_each(__lhs, __rhs, |
1731 | [&](auto __meta, auto& __native_lhs, auto __native_rhs) | |
1732 | constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA | |
1733 | { | |
1734 | __meta._S_masked_assign(__meta._S_make_mask(__bits), __native_lhs, | |
1735 | __native_rhs); | |
1736 | }); | |
2bcceb6f MK |
1737 | } |
1738 | ||
1739 | // Optimization for the case where the RHS is a scalar. No need to broadcast | |
1740 | // the scalar to a simd first. | |
1741 | template <typename _Tp, typename... _As> | |
da579188 | 1742 | _GLIBCXX_SIMD_INTRINSIC static constexpr void |
2bcceb6f MK |
1743 | _S_masked_assign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, |
1744 | const __type_identity_t<_Tp> __rhs) | |
1745 | { | |
1746 | __for_each( | |
53b55701 | 1747 | __lhs, [&](auto __meta, auto& __native_lhs) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
2bcceb6f MK |
1748 | __meta._S_masked_assign(__meta._S_make_mask(__bits), __native_lhs, |
1749 | __rhs); | |
1750 | }); | |
1751 | } | |
1752 | ||
1753 | // _S_masked_cassign {{{2 | |
1754 | template <typename _Op, typename _Tp, typename... _As> | |
da579188 | 1755 | static constexpr inline void |
b31186e5 MK |
1756 | _S_masked_cassign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, |
1757 | const _SimdTuple<_Tp, _As...>& __rhs, _Op __op) | |
2bcceb6f | 1758 | { |
53b55701 MK |
1759 | __for_each(__lhs, __rhs, |
1760 | [&](auto __meta, auto& __native_lhs, auto __native_rhs) | |
1761 | constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA | |
1762 | { | |
1763 | __meta.template _S_masked_cassign(__meta._S_make_mask(__bits), | |
1764 | __native_lhs, __native_rhs, __op); | |
1765 | }); | |
2bcceb6f MK |
1766 | } |
1767 | ||
1768 | // Optimization for the case where the RHS is a scalar. No need to broadcast | |
1769 | // the scalar to a simd first. | |
1770 | template <typename _Op, typename _Tp, typename... _As> | |
da579188 | 1771 | static constexpr inline void |
b31186e5 MK |
1772 | _S_masked_cassign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs, |
1773 | const _Tp& __rhs, _Op __op) | |
2bcceb6f MK |
1774 | { |
1775 | __for_each( | |
53b55701 | 1776 | __lhs, [&](auto __meta, auto& __native_lhs) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
2bcceb6f MK |
1777 | __meta.template _S_masked_cassign(__meta._S_make_mask(__bits), |
1778 | __native_lhs, __rhs, __op); | |
1779 | }); | |
1780 | } | |
1781 | ||
1782 | // _S_masked_unary {{{2 | |
1783 | template <template <typename> class _Op, typename _Tp, typename... _As> | |
da579188 | 1784 | static constexpr inline _SimdTuple<_Tp, _As...> |
343f01f4 | 1785 | _S_masked_unary(const _MaskMember __bits, const _SimdTuple<_Tp, _As...>& __v) |
2bcceb6f MK |
1786 | { |
1787 | return __v._M_apply_wrapped([&__bits](auto __meta, | |
1788 | auto __native) constexpr { | |
1789 | return __meta.template _S_masked_unary<_Op>(__meta._S_make_mask( | |
1790 | __bits), | |
1791 | __native); | |
1792 | }); | |
1793 | } | |
1794 | ||
1795 | // }}}2 | |
1796 | }; | |
1797 | ||
1798 | // _MaskImplFixedSize {{{1 | |
52d28210 | 1799 | template <int _Np, typename> |
2bcceb6f MK |
1800 | struct _MaskImplFixedSize |
1801 | { | |
1802 | static_assert( | |
1803 | sizeof(_ULLong) * __CHAR_BIT__ >= _Np, | |
1804 | "The fixed_size implementation relies on one _ULLong being able to store " | |
1805 | "all boolean elements."); // required in load & store | |
1806 | ||
1807 | // member types {{{ | |
1808 | using _Abi = simd_abi::fixed_size<_Np>; | |
1809 | ||
1810 | using _MaskMember = _SanitizedBitMask<_Np>; | |
1811 | ||
1812 | template <typename _Tp> | |
1813 | using _FirstAbi = typename __fixed_size_storage_t<_Tp, _Np>::_FirstAbi; | |
1814 | ||
1815 | template <typename _Tp> | |
1816 | using _TypeTag = _Tp*; | |
1817 | ||
1818 | // }}} | |
1819 | // _S_broadcast {{{ | |
1820 | template <typename> | |
1821 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember | |
1822 | _S_broadcast(bool __x) | |
1823 | { return __x ? ~_MaskMember() : _MaskMember(); } | |
1824 | ||
1825 | // }}} | |
1826 | // _S_load {{{ | |
1827 | template <typename> | |
1828 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember | |
1829 | _S_load(const bool* __mem) | |
1830 | { | |
da579188 MK |
1831 | if (__builtin_is_constant_evaluated()) |
1832 | { | |
1833 | _MaskMember __r{}; | |
1834 | for (size_t __i = 0; __i < _Np; ++__i) | |
1835 | __r.set(__i, __mem[__i]); | |
1836 | return __r; | |
1837 | } | |
2bcceb6f MK |
1838 | using _Ip = __int_for_sizeof_t<bool>; |
1839 | // the following load uses element_aligned and relies on __mem already | |
1840 | // carrying alignment information from when this load function was | |
1841 | // called. | |
1842 | const simd<_Ip, _Abi> __bools(reinterpret_cast<const __may_alias<_Ip>*>( | |
1843 | __mem), | |
1844 | element_aligned); | |
1845 | return __data(__bools != 0); | |
1846 | } | |
1847 | ||
1848 | // }}} | |
1849 | // _S_to_bits {{{ | |
1850 | template <bool _Sanitized> | |
1851 | _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np> | |
1852 | _S_to_bits(_BitMask<_Np, _Sanitized> __x) | |
1853 | { | |
1854 | if constexpr (_Sanitized) | |
1855 | return __x; | |
1856 | else | |
1857 | return __x._M_sanitized(); | |
1858 | } | |
1859 | ||
1860 | // }}} | |
1861 | // _S_convert {{{ | |
1862 | template <typename _Tp, typename _Up, typename _UAbi> | |
1863 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember | |
1864 | _S_convert(simd_mask<_Up, _UAbi> __x) | |
1865 | { | |
1866 | return _UAbi::_MaskImpl::_S_to_bits(__data(__x)) | |
1867 | .template _M_extract<0, _Np>(); | |
1868 | } | |
1869 | ||
1870 | // }}} | |
1871 | // _S_from_bitmask {{{2 | |
1872 | template <typename _Tp> | |
da579188 | 1873 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember |
2bcceb6f MK |
1874 | _S_from_bitmask(_MaskMember __bits, _TypeTag<_Tp>) noexcept |
1875 | { return __bits; } | |
1876 | ||
1877 | // _S_load {{{2 | |
da579188 | 1878 | static constexpr inline _MaskMember |
b31186e5 | 1879 | _S_load(const bool* __mem) noexcept |
2bcceb6f MK |
1880 | { |
1881 | // TODO: _UChar is not necessarily the best type to use here. For smaller | |
1882 | // _Np _UShort, _UInt, _ULLong, float, and double can be more efficient. | |
1883 | _ULLong __r = 0; | |
1884 | using _Vs = __fixed_size_storage_t<_UChar, _Np>; | |
53b55701 | 1885 | __for_each(_Vs{}, [&](auto __meta, auto) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { |
2bcceb6f MK |
1886 | __r |= __meta._S_mask_to_shifted_ullong( |
1887 | __meta._S_mask_impl._S_load(&__mem[__meta._S_offset], | |
1888 | _SizeConstant<__meta._S_size()>())); | |
1889 | }); | |
1890 | return __r; | |
1891 | } | |
1892 | ||
1893 | // _S_masked_load {{{2 | |
da579188 | 1894 | static constexpr inline _MaskMember |
b31186e5 | 1895 | _S_masked_load(_MaskMember __merge, _MaskMember __mask, const bool* __mem) noexcept |
2bcceb6f | 1896 | { |
53b55701 MK |
1897 | _BitOps::_S_bit_iteration(__mask.to_ullong(), |
1898 | [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { | |
1899 | __merge.set(__i, __mem[__i]); | |
1900 | }); | |
2bcceb6f MK |
1901 | return __merge; |
1902 | } | |
1903 | ||
1904 | // _S_store {{{2 | |
da579188 | 1905 | static constexpr inline void |
b31186e5 | 1906 | _S_store(const _MaskMember __bitmask, bool* __mem) noexcept |
2bcceb6f MK |
1907 | { |
1908 | if constexpr (_Np == 1) | |
1909 | __mem[0] = __bitmask[0]; | |
1910 | else | |
1911 | _FirstAbi<_UChar>::_CommonImpl::_S_store_bool_array(__bitmask, __mem); | |
1912 | } | |
1913 | ||
1914 | // _S_masked_store {{{2 | |
da579188 | 1915 | static constexpr inline void |
b31186e5 | 1916 | _S_masked_store(const _MaskMember __v, bool* __mem, const _MaskMember __k) noexcept |
2bcceb6f | 1917 | { |
53b55701 MK |
1918 | _BitOps::_S_bit_iteration( |
1919 | __k, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __mem[__i] = __v[__i]; }); | |
2bcceb6f MK |
1920 | } |
1921 | ||
1922 | // logical and bitwise operators {{{2 | |
da579188 | 1923 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember |
2bcceb6f MK |
1924 | _S_logical_and(const _MaskMember& __x, const _MaskMember& __y) noexcept |
1925 | { return __x & __y; } | |
1926 | ||
da579188 | 1927 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember |
2bcceb6f MK |
1928 | _S_logical_or(const _MaskMember& __x, const _MaskMember& __y) noexcept |
1929 | { return __x | __y; } | |
1930 | ||
1931 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember | |
1932 | _S_bit_not(const _MaskMember& __x) noexcept | |
1933 | { return ~__x; } | |
1934 | ||
da579188 | 1935 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember |
2bcceb6f MK |
1936 | _S_bit_and(const _MaskMember& __x, const _MaskMember& __y) noexcept |
1937 | { return __x & __y; } | |
1938 | ||
da579188 | 1939 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember |
2bcceb6f MK |
1940 | _S_bit_or(const _MaskMember& __x, const _MaskMember& __y) noexcept |
1941 | { return __x | __y; } | |
1942 | ||
da579188 | 1943 | _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember |
2bcceb6f MK |
1944 | _S_bit_xor(const _MaskMember& __x, const _MaskMember& __y) noexcept |
1945 | { return __x ^ __y; } | |
1946 | ||
1947 | // smart_reference access {{{2 | |
da579188 | 1948 | _GLIBCXX_SIMD_INTRINSIC static constexpr void |
b31186e5 | 1949 | _S_set(_MaskMember& __k, int __i, bool __x) noexcept |
2bcceb6f MK |
1950 | { __k.set(__i, __x); } |
1951 | ||
1952 | // _S_masked_assign {{{2 | |
da579188 | 1953 | _GLIBCXX_SIMD_INTRINSIC static constexpr void |
b31186e5 | 1954 | _S_masked_assign(const _MaskMember __k, _MaskMember& __lhs, const _MaskMember __rhs) |
2bcceb6f MK |
1955 | { __lhs = (__lhs & ~__k) | (__rhs & __k); } |
1956 | ||
1957 | // Optimization for the case where the RHS is a scalar. | |
da579188 | 1958 | _GLIBCXX_SIMD_INTRINSIC static constexpr void |
b31186e5 | 1959 | _S_masked_assign(const _MaskMember __k, _MaskMember& __lhs, const bool __rhs) |
2bcceb6f MK |
1960 | { |
1961 | if (__rhs) | |
1962 | __lhs |= __k; | |
1963 | else | |
1964 | __lhs &= ~__k; | |
1965 | } | |
1966 | ||
1967 | // }}}2 | |
1968 | // _S_all_of {{{ | |
1969 | template <typename _Tp> | |
da579188 | 1970 | _GLIBCXX_SIMD_INTRINSIC static constexpr bool |
b31186e5 | 1971 | _S_all_of(simd_mask<_Tp, _Abi> __k) |
2bcceb6f MK |
1972 | { return __data(__k).all(); } |
1973 | ||
1974 | // }}} | |
1975 | // _S_any_of {{{ | |
1976 | template <typename _Tp> | |
da579188 | 1977 | _GLIBCXX_SIMD_INTRINSIC static constexpr bool |
b31186e5 | 1978 | _S_any_of(simd_mask<_Tp, _Abi> __k) |
2bcceb6f MK |
1979 | { return __data(__k).any(); } |
1980 | ||
1981 | // }}} | |
1982 | // _S_none_of {{{ | |
1983 | template <typename _Tp> | |
da579188 | 1984 | _GLIBCXX_SIMD_INTRINSIC static constexpr bool |
b31186e5 | 1985 | _S_none_of(simd_mask<_Tp, _Abi> __k) |
2bcceb6f MK |
1986 | { return __data(__k).none(); } |
1987 | ||
1988 | // }}} | |
1989 | // _S_some_of {{{ | |
1990 | template <typename _Tp> | |
da579188 | 1991 | _GLIBCXX_SIMD_INTRINSIC static constexpr bool |
2bcceb6f MK |
1992 | _S_some_of([[maybe_unused]] simd_mask<_Tp, _Abi> __k) |
1993 | { | |
1994 | if constexpr (_Np == 1) | |
1995 | return false; | |
1996 | else | |
1997 | return __data(__k).any() && !__data(__k).all(); | |
1998 | } | |
1999 | ||
2000 | // }}} | |
2001 | // _S_popcount {{{ | |
2002 | template <typename _Tp> | |
da579188 | 2003 | _GLIBCXX_SIMD_INTRINSIC static constexpr int |
b31186e5 | 2004 | _S_popcount(simd_mask<_Tp, _Abi> __k) |
2bcceb6f MK |
2005 | { return __data(__k).count(); } |
2006 | ||
2007 | // }}} | |
2008 | // _S_find_first_set {{{ | |
2009 | template <typename _Tp> | |
da579188 | 2010 | _GLIBCXX_SIMD_INTRINSIC static constexpr int |
2bcceb6f MK |
2011 | _S_find_first_set(simd_mask<_Tp, _Abi> __k) |
2012 | { return std::__countr_zero(__data(__k).to_ullong()); } | |
2013 | ||
2014 | // }}} | |
2015 | // _S_find_last_set {{{ | |
2016 | template <typename _Tp> | |
da579188 | 2017 | _GLIBCXX_SIMD_INTRINSIC static constexpr int |
2bcceb6f MK |
2018 | _S_find_last_set(simd_mask<_Tp, _Abi> __k) |
2019 | { return std::__bit_width(__data(__k).to_ullong()) - 1; } | |
2020 | ||
2021 | // }}} | |
2022 | }; | |
2023 | // }}}1 | |
2024 | ||
2025 | _GLIBCXX_SIMD_END_NAMESPACE | |
2026 | #endif // __cplusplus >= 201703L | |
2027 | #endif // _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_ | |
2028 | ||
2029 | // vim: foldmethod=marker sw=2 noet ts=8 sts=2 tw=80 |