]>
Commit | Line | Data |
---|---|---|
2bcceb6f MK |
1 | // Math overloads for simd -*- C++ -*- |
2 | ||
7adcbafe | 3 | // Copyright (C) 2020-2022 Free Software Foundation, Inc. |
2bcceb6f MK |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the | |
7 | // terms of the GNU General Public License as published by the | |
8 | // Free Software Foundation; either version 3, or (at your option) | |
9 | // any later version. | |
10 | ||
11 | // This library is distributed in the hope that it will be useful, | |
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | // GNU General Public License for more details. | |
15 | ||
16 | // Under Section 7 of GPL version 3, you are granted additional | |
17 | // permissions described in the GCC Runtime Library Exception, version | |
18 | // 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | // You should have received a copy of the GNU General Public License and | |
21 | // a copy of the GCC Runtime Library Exception along with this program; | |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | // <http://www.gnu.org/licenses/>. | |
24 | ||
25 | #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_ | |
26 | #define _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_ | |
27 | ||
28 | #if __cplusplus >= 201703L | |
29 | ||
30 | #include <utility> | |
31 | #include <iomanip> | |
32 | ||
33 | _GLIBCXX_SIMD_BEGIN_NAMESPACE | |
34 | template <typename _Tp, typename _V> | |
35 | using _Samesize = fixed_size_simd<_Tp, _V::size()>; | |
36 | ||
37 | // _Math_return_type {{{ | |
38 | template <typename _DoubleR, typename _Tp, typename _Abi> | |
39 | struct _Math_return_type; | |
40 | ||
41 | template <typename _DoubleR, typename _Tp, typename _Abi> | |
42 | using _Math_return_type_t = | |
43 | typename _Math_return_type<_DoubleR, _Tp, _Abi>::type; | |
44 | ||
45 | template <typename _Tp, typename _Abi> | |
46 | struct _Math_return_type<double, _Tp, _Abi> | |
47 | { using type = simd<_Tp, _Abi>; }; | |
48 | ||
49 | template <typename _Tp, typename _Abi> | |
50 | struct _Math_return_type<bool, _Tp, _Abi> | |
51 | { using type = simd_mask<_Tp, _Abi>; }; | |
52 | ||
53 | template <typename _DoubleR, typename _Tp, typename _Abi> | |
54 | struct _Math_return_type | |
55 | { using type = fixed_size_simd<_DoubleR, simd_size_v<_Tp, _Abi>>; }; | |
56 | ||
57 | //}}} | |
58 | // _GLIBCXX_SIMD_MATH_CALL_ {{{ | |
59 | #define _GLIBCXX_SIMD_MATH_CALL_(__name) \ | |
60 | template <typename _Tp, typename _Abi, typename..., \ | |
61 | typename _R = _Math_return_type_t< \ | |
62 | decltype(std::__name(declval<double>())), _Tp, _Abi>> \ | |
63 | enable_if_t<is_floating_point_v<_Tp>, _R> \ | |
64 | __name(simd<_Tp, _Abi> __x) \ | |
65 | { return {__private_init, _Abi::_SimdImpl::_S_##__name(__data(__x))}; } | |
66 | ||
67 | // }}} | |
68 | //_Extra_argument_type{{{ | |
69 | template <typename _Up, typename _Tp, typename _Abi> | |
70 | struct _Extra_argument_type; | |
71 | ||
72 | template <typename _Tp, typename _Abi> | |
73 | struct _Extra_argument_type<_Tp*, _Tp, _Abi> | |
74 | { | |
75 | using type = simd<_Tp, _Abi>*; | |
76 | static constexpr double* declval(); | |
77 | static constexpr bool __needs_temporary_scalar = true; | |
78 | ||
79 | _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x) | |
80 | { return &__data(*__x); } | |
81 | }; | |
82 | ||
83 | template <typename _Up, typename _Tp, typename _Abi> | |
84 | struct _Extra_argument_type<_Up*, _Tp, _Abi> | |
85 | { | |
86 | static_assert(is_integral_v<_Up>); | |
87 | using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>*; | |
88 | static constexpr _Up* declval(); | |
89 | static constexpr bool __needs_temporary_scalar = true; | |
90 | ||
91 | _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x) | |
92 | { return &__data(*__x); } | |
93 | }; | |
94 | ||
95 | template <typename _Tp, typename _Abi> | |
96 | struct _Extra_argument_type<_Tp, _Tp, _Abi> | |
97 | { | |
98 | using type = simd<_Tp, _Abi>; | |
99 | static constexpr double declval(); | |
100 | static constexpr bool __needs_temporary_scalar = false; | |
101 | ||
102 | _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto) | |
103 | _S_data(const type& __x) | |
104 | { return __data(__x); } | |
105 | }; | |
106 | ||
107 | template <typename _Up, typename _Tp, typename _Abi> | |
108 | struct _Extra_argument_type | |
109 | { | |
110 | static_assert(is_integral_v<_Up>); | |
111 | using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>; | |
112 | static constexpr _Up declval(); | |
113 | static constexpr bool __needs_temporary_scalar = false; | |
114 | ||
115 | _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto) | |
116 | _S_data(const type& __x) | |
117 | { return __data(__x); } | |
118 | }; | |
119 | ||
120 | //}}} | |
121 | // _GLIBCXX_SIMD_MATH_CALL2_ {{{ | |
addd5f0e | 122 | #define _GLIBCXX_SIMD_MATH_CALL2_(__name, __arg2) \ |
2bcceb6f MK |
123 | template < \ |
124 | typename _Tp, typename _Abi, typename..., \ | |
addd5f0e | 125 | typename _Arg2 = _Extra_argument_type<__arg2, _Tp, _Abi>, \ |
2bcceb6f MK |
126 | typename _R = _Math_return_type_t< \ |
127 | decltype(std::__name(declval<double>(), _Arg2::declval())), _Tp, _Abi>> \ | |
128 | enable_if_t<is_floating_point_v<_Tp>, _R> \ | |
129 | __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y) \ | |
130 | { \ | |
131 | return {__private_init, \ | |
132 | _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y))}; \ | |
133 | } \ | |
134 | template <typename _Up, typename _Tp, typename _Abi> \ | |
135 | _GLIBCXX_SIMD_INTRINSIC _Math_return_type_t< \ | |
136 | decltype(std::__name( \ | |
137 | declval<double>(), \ | |
138 | declval<enable_if_t< \ | |
139 | conjunction_v< \ | |
addd5f0e | 140 | is_same<__arg2, _Tp>, \ |
2bcceb6f MK |
141 | negation<is_same<__remove_cvref_t<_Up>, simd<_Tp, _Abi>>>, \ |
142 | is_convertible<_Up, simd<_Tp, _Abi>>, is_floating_point<_Tp>>, \ | |
143 | double>>())), \ | |
144 | _Tp, _Abi> \ | |
145 | __name(_Up&& __xx, const simd<_Tp, _Abi>& __yy) \ | |
146 | { return __name(simd<_Tp, _Abi>(static_cast<_Up&&>(__xx)), __yy); } | |
147 | ||
148 | // }}} | |
149 | // _GLIBCXX_SIMD_MATH_CALL3_ {{{ | |
addd5f0e | 150 | #define _GLIBCXX_SIMD_MATH_CALL3_(__name, __arg2, __arg3) \ |
2bcceb6f | 151 | template <typename _Tp, typename _Abi, typename..., \ |
addd5f0e MK |
152 | typename _Arg2 = _Extra_argument_type<__arg2, _Tp, _Abi>, \ |
153 | typename _Arg3 = _Extra_argument_type<__arg3, _Tp, _Abi>, \ | |
2bcceb6f MK |
154 | typename _R = _Math_return_type_t< \ |
155 | decltype(std::__name(declval<double>(), _Arg2::declval(), \ | |
156 | _Arg3::declval())), \ | |
157 | _Tp, _Abi>> \ | |
158 | enable_if_t<is_floating_point_v<_Tp>, _R> \ | |
159 | __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y, \ | |
160 | const typename _Arg3::type& __z) \ | |
161 | { \ | |
162 | return {__private_init, \ | |
163 | _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y), \ | |
164 | _Arg3::_S_data(__z))}; \ | |
165 | } \ | |
166 | template < \ | |
167 | typename _T0, typename _T1, typename _T2, typename..., \ | |
168 | typename _U0 = __remove_cvref_t<_T0>, \ | |
169 | typename _U1 = __remove_cvref_t<_T1>, \ | |
170 | typename _U2 = __remove_cvref_t<_T2>, \ | |
171 | typename _Simd = conditional_t<is_simd_v<_U1>, _U1, _U2>, \ | |
172 | typename = enable_if_t<conjunction_v< \ | |
173 | is_simd<_Simd>, is_convertible<_T0&&, _Simd>, \ | |
174 | is_convertible<_T1&&, _Simd>, is_convertible<_T2&&, _Simd>, \ | |
175 | negation<conjunction< \ | |
176 | is_simd<_U0>, is_floating_point<__value_type_or_identity_t<_U0>>>>>>> \ | |
177 | _GLIBCXX_SIMD_INTRINSIC decltype(__name(declval<const _Simd&>(), \ | |
178 | declval<const _Simd&>(), \ | |
179 | declval<const _Simd&>())) \ | |
180 | __name(_T0&& __xx, _T1&& __yy, _T2&& __zz) \ | |
181 | { \ | |
182 | return __name(_Simd(static_cast<_T0&&>(__xx)), \ | |
183 | _Simd(static_cast<_T1&&>(__yy)), \ | |
184 | _Simd(static_cast<_T2&&>(__zz))); \ | |
185 | } | |
186 | ||
187 | // }}} | |
188 | // __cosSeries {{{ | |
189 | template <typename _Abi> | |
190 | _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi> | |
191 | __cosSeries(const simd<float, _Abi>& __x) | |
192 | { | |
193 | const simd<float, _Abi> __x2 = __x * __x; | |
194 | simd<float, _Abi> __y; | |
195 | __y = 0x1.ap-16f; // 1/8! | |
196 | __y = __y * __x2 - 0x1.6c1p-10f; // -1/6! | |
197 | __y = __y * __x2 + 0x1.555556p-5f; // 1/4! | |
198 | return __y * (__x2 * __x2) - .5f * __x2 + 1.f; | |
199 | } | |
200 | ||
201 | template <typename _Abi> | |
202 | _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi> | |
203 | __cosSeries(const simd<double, _Abi>& __x) | |
204 | { | |
205 | const simd<double, _Abi> __x2 = __x * __x; | |
206 | simd<double, _Abi> __y; | |
207 | __y = 0x1.AC00000000000p-45; // 1/16! | |
208 | __y = __y * __x2 - 0x1.9394000000000p-37; // -1/14! | |
209 | __y = __y * __x2 + 0x1.1EED8C0000000p-29; // 1/12! | |
210 | __y = __y * __x2 - 0x1.27E4FB7400000p-22; // -1/10! | |
211 | __y = __y * __x2 + 0x1.A01A01A018000p-16; // 1/8! | |
212 | __y = __y * __x2 - 0x1.6C16C16C16C00p-10; // -1/6! | |
213 | __y = __y * __x2 + 0x1.5555555555554p-5; // 1/4! | |
214 | return (__y * __x2 - .5f) * __x2 + 1.f; | |
215 | } | |
216 | ||
217 | // }}} | |
218 | // __sinSeries {{{ | |
219 | template <typename _Abi> | |
220 | _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi> | |
221 | __sinSeries(const simd<float, _Abi>& __x) | |
222 | { | |
223 | const simd<float, _Abi> __x2 = __x * __x; | |
224 | simd<float, _Abi> __y; | |
225 | __y = -0x1.9CC000p-13f; // -1/7! | |
226 | __y = __y * __x2 + 0x1.111100p-7f; // 1/5! | |
227 | __y = __y * __x2 - 0x1.555556p-3f; // -1/3! | |
228 | return __y * (__x2 * __x) + __x; | |
229 | } | |
230 | ||
231 | template <typename _Abi> | |
232 | _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi> | |
233 | __sinSeries(const simd<double, _Abi>& __x) | |
234 | { | |
235 | // __x = [0, 0.7854 = pi/4] | |
236 | // __x² = [0, 0.6169 = pi²/8] | |
237 | const simd<double, _Abi> __x2 = __x * __x; | |
238 | simd<double, _Abi> __y; | |
239 | __y = -0x1.ACF0000000000p-41; // -1/15! | |
240 | __y = __y * __x2 + 0x1.6124400000000p-33; // 1/13! | |
241 | __y = __y * __x2 - 0x1.AE64567000000p-26; // -1/11! | |
242 | __y = __y * __x2 + 0x1.71DE3A5540000p-19; // 1/9! | |
243 | __y = __y * __x2 - 0x1.A01A01A01A000p-13; // -1/7! | |
244 | __y = __y * __x2 + 0x1.1111111111110p-7; // 1/5! | |
245 | __y = __y * __x2 - 0x1.5555555555555p-3; // -1/3! | |
246 | return __y * (__x2 * __x) + __x; | |
247 | } | |
248 | ||
249 | // }}} | |
250 | // __zero_low_bits {{{ | |
251 | template <int _Bits, typename _Tp, typename _Abi> | |
252 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> | |
253 | __zero_low_bits(simd<_Tp, _Abi> __x) | |
254 | { | |
255 | const simd<_Tp, _Abi> __bitmask | |
256 | = __bit_cast<_Tp>(~make_unsigned_t<__int_for_sizeof_t<_Tp>>() << _Bits); | |
257 | return {__private_init, | |
258 | _Abi::_SimdImpl::_S_bit_and(__data(__x), __data(__bitmask))}; | |
259 | } | |
260 | ||
261 | // }}} | |
262 | // __fold_input {{{ | |
263 | ||
264 | /**@internal | |
265 | * Fold @p x into [-¼π, ¼π] and remember the quadrant it came from: | |
266 | * quadrant 0: [-¼π, ¼π] | |
267 | * quadrant 1: [ ¼π, ¾π] | |
268 | * quadrant 2: [ ¾π, 1¼π] | |
269 | * quadrant 3: [1¼π, 1¾π] | |
270 | * | |
271 | * The algorithm determines `y` as the multiple `x - y * ¼π = [-¼π, ¼π]`. Using | |
272 | * a bitmask, `y` is reduced to `quadrant`. `y` can be calculated as | |
273 | * ``` | |
274 | * y = trunc(x / ¼π); | |
275 | * y += fmod(y, 2); | |
276 | * ``` | |
277 | * This can be simplified by moving the (implicit) division by 2 into the | |
278 | * truncation expression. The `+= fmod` effect can the be achieved by using | |
279 | * rounding instead of truncation: `y = round(x / ½π) * 2`. If precision allows, | |
280 | * `2/Ï€ * x` is better (faster). | |
281 | */ | |
282 | template <typename _Tp, typename _Abi> | |
283 | struct _Folded | |
284 | { | |
285 | simd<_Tp, _Abi> _M_x; | |
286 | rebind_simd_t<int, simd<_Tp, _Abi>> _M_quadrant; | |
287 | }; | |
288 | ||
289 | namespace __math_float { | |
290 | inline constexpr float __pi_over_4 = 0x1.921FB6p-1f; // π/4 | |
291 | inline constexpr float __2_over_pi = 0x1.45F306p-1f; // 2/Ï€ | |
292 | inline constexpr float __pi_2_5bits0 | |
293 | = 0x1.921fc0p0f; // π/2, 5 0-bits (least significant) | |
294 | inline constexpr float __pi_2_5bits0_rem | |
295 | = -0x1.5777a6p-21f; // π/2 - __pi_2_5bits0 | |
296 | } // namespace __math_float | |
297 | namespace __math_double { | |
298 | inline constexpr double __pi_over_4 = 0x1.921fb54442d18p-1; // π/4 | |
299 | inline constexpr double __2_over_pi = 0x1.45F306DC9C883p-1; // 2/Ï€ | |
300 | inline constexpr double __pi_2 = 0x1.921fb54442d18p0; // π/2 | |
301 | } // namespace __math_double | |
302 | ||
303 | template <typename _Abi> | |
304 | _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<float, _Abi> | |
305 | __fold_input(const simd<float, _Abi>& __x) | |
306 | { | |
307 | using _V = simd<float, _Abi>; | |
308 | using _IV = rebind_simd_t<int, _V>; | |
309 | using namespace __math_float; | |
310 | _Folded<float, _Abi> __r; | |
311 | __r._M_x = abs(__x); | |
312 | #if 0 | |
313 | // zero most mantissa bits: | |
314 | constexpr float __1_over_pi = 0x1.45F306p-2f; // 1/Ï€ | |
315 | const auto __y = (__r._M_x * __1_over_pi + 0x1.8p23f) - 0x1.8p23f; | |
316 | // split π into 4 parts, the first three with 13 trailing zeros (to make the | |
317 | // following multiplications precise): | |
318 | constexpr float __pi0 = 0x1.920000p1f; | |
319 | constexpr float __pi1 = 0x1.fb4000p-11f; | |
320 | constexpr float __pi2 = 0x1.444000p-23f; | |
321 | constexpr float __pi3 = 0x1.68c234p-38f; | |
322 | __r._M_x - __y*__pi0 - __y*__pi1 - __y*__pi2 - __y*__pi3 | |
323 | #else | |
324 | if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4))) | |
325 | __r._M_quadrant = 0; | |
326 | else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 6 * __pi_over_4))) | |
327 | { | |
328 | const _V __y = nearbyint(__r._M_x * __2_over_pi); | |
329 | __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // __y mod 4 | |
330 | __r._M_x -= __y * __pi_2_5bits0; | |
331 | __r._M_x -= __y * __pi_2_5bits0_rem; | |
332 | } | |
333 | else | |
334 | { | |
335 | using __math_double::__2_over_pi; | |
336 | using __math_double::__pi_2; | |
337 | using _VD = rebind_simd_t<double, _V>; | |
338 | _VD __xd = static_simd_cast<_VD>(__r._M_x); | |
339 | _VD __y = nearbyint(__xd * __2_over_pi); | |
340 | __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // = __y mod 4 | |
341 | __r._M_x = static_simd_cast<_V>(__xd - __y * __pi_2); | |
342 | } | |
343 | #endif | |
344 | return __r; | |
345 | } | |
346 | ||
347 | template <typename _Abi> | |
348 | _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<double, _Abi> | |
349 | __fold_input(const simd<double, _Abi>& __x) | |
350 | { | |
351 | using _V = simd<double, _Abi>; | |
352 | using _IV = rebind_simd_t<int, _V>; | |
353 | using namespace __math_double; | |
354 | ||
355 | _Folded<double, _Abi> __r; | |
356 | __r._M_x = abs(__x); | |
357 | if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4))) | |
358 | { | |
359 | __r._M_quadrant = 0; | |
360 | return __r; | |
361 | } | |
362 | const _V __y = nearbyint(__r._M_x / (2 * __pi_over_4)); | |
363 | __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; | |
364 | ||
365 | if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 1025 * __pi_over_4))) | |
366 | { | |
367 | // x - y * pi/2, y uses no more than 11 mantissa bits | |
368 | __r._M_x -= __y * 0x1.921FB54443000p0; | |
369 | __r._M_x -= __y * -0x1.73DCB3B39A000p-43; | |
370 | __r._M_x -= __y * 0x1.45C06E0E68948p-86; | |
371 | } | |
372 | else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__y <= 0x1.0p30))) | |
373 | { | |
374 | // x - y * pi/2, y uses no more than 29 mantissa bits | |
375 | __r._M_x -= __y * 0x1.921FB40000000p0; | |
376 | __r._M_x -= __y * 0x1.4442D00000000p-24; | |
377 | __r._M_x -= __y * 0x1.8469898CC5170p-48; | |
378 | } | |
379 | else | |
380 | { | |
381 | // x - y * pi/2, y may require all mantissa bits | |
382 | const _V __y_hi = __zero_low_bits<26>(__y); | |
383 | const _V __y_lo = __y - __y_hi; | |
384 | const auto __pi_2_1 = 0x1.921FB50000000p0; | |
385 | const auto __pi_2_2 = 0x1.110B460000000p-26; | |
386 | const auto __pi_2_3 = 0x1.1A62630000000p-54; | |
387 | const auto __pi_2_4 = 0x1.8A2E03707344Ap-81; | |
388 | __r._M_x = __r._M_x - __y_hi * __pi_2_1 | |
389 | - max(__y_hi * __pi_2_2, __y_lo * __pi_2_1) | |
390 | - min(__y_hi * __pi_2_2, __y_lo * __pi_2_1) | |
391 | - max(__y_hi * __pi_2_3, __y_lo * __pi_2_2) | |
392 | - min(__y_hi * __pi_2_3, __y_lo * __pi_2_2) | |
393 | - max(__y * __pi_2_4, __y_lo * __pi_2_3) | |
394 | - min(__y * __pi_2_4, __y_lo * __pi_2_3); | |
395 | } | |
396 | return __r; | |
397 | } | |
398 | ||
399 | // }}} | |
400 | // __extract_exponent_as_int {{{ | |
401 | template <typename _Tp, typename _Abi> | |
402 | rebind_simd_t<int, simd<_Tp, _Abi>> | |
403 | __extract_exponent_as_int(const simd<_Tp, _Abi>& __v) | |
404 | { | |
405 | using _Vp = simd<_Tp, _Abi>; | |
406 | using _Up = make_unsigned_t<__int_for_sizeof_t<_Tp>>; | |
407 | using namespace std::experimental::__float_bitwise_operators; | |
74ebd129 | 408 | using namespace std::experimental::__proposed; |
2bcceb6f MK |
409 | const _Vp __exponent_mask |
410 | = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000 | |
411 | return static_simd_cast<rebind_simd_t<int, _Vp>>( | |
74ebd129 | 412 | simd_bit_cast<rebind_simd_t<_Up, _Vp>>(__v & __exponent_mask) |
2bcceb6f MK |
413 | >> (__digits_v<_Tp> - 1)); |
414 | } | |
415 | ||
416 | // }}} | |
417 | // __impl_or_fallback {{{ | |
418 | template <typename ImplFun, typename FallbackFun, typename... _Args> | |
419 | _GLIBCXX_SIMD_INTRINSIC auto | |
420 | __impl_or_fallback_dispatch(int, ImplFun&& __impl_fun, FallbackFun&&, | |
421 | _Args&&... __args) | |
422 | -> decltype(__impl_fun(static_cast<_Args&&>(__args)...)) | |
423 | { return __impl_fun(static_cast<_Args&&>(__args)...); } | |
424 | ||
425 | template <typename ImplFun, typename FallbackFun, typename... _Args> | |
426 | inline auto | |
427 | __impl_or_fallback_dispatch(float, ImplFun&&, FallbackFun&& __fallback_fun, | |
428 | _Args&&... __args) | |
429 | -> decltype(__fallback_fun(static_cast<_Args&&>(__args)...)) | |
430 | { return __fallback_fun(static_cast<_Args&&>(__args)...); } | |
431 | ||
432 | template <typename... _Args> | |
433 | _GLIBCXX_SIMD_INTRINSIC auto | |
434 | __impl_or_fallback(_Args&&... __args) | |
435 | { | |
436 | return __impl_or_fallback_dispatch(int(), static_cast<_Args&&>(__args)...); | |
437 | } | |
438 | //}}} | |
439 | ||
440 | // trigonometric functions {{{ | |
441 | _GLIBCXX_SIMD_MATH_CALL_(acos) | |
442 | _GLIBCXX_SIMD_MATH_CALL_(asin) | |
443 | _GLIBCXX_SIMD_MATH_CALL_(atan) | |
444 | _GLIBCXX_SIMD_MATH_CALL2_(atan2, _Tp) | |
445 | ||
446 | /* | |
447 | * algorithm for sine and cosine: | |
448 | * | |
449 | * The result can be calculated with sine or cosine depending on the π/4 section | |
450 | * the input is in. sine ≈ __x + __x³ cosine ≈ 1 - __x² | |
451 | * | |
452 | * sine: | |
453 | * Map -__x to __x and invert the output | |
454 | * Extend precision of __x - n * π/4 by calculating | |
455 | * ((__x - n * p1) - n * p2) - n * p3 (p1 + p2 + p3 = π/4) | |
456 | * | |
457 | * Calculate Taylor series with tuned coefficients. | |
458 | * Fix sign. | |
459 | */ | |
460 | // cos{{{ | |
461 | template <typename _Tp, typename _Abi> | |
462 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> | |
463 | cos(const simd<_Tp, _Abi>& __x) | |
464 | { | |
465 | using _V = simd<_Tp, _Abi>; | |
466 | if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>) | |
467 | return {__private_init, _Abi::_SimdImpl::_S_cos(__data(__x))}; | |
468 | else | |
469 | { | |
470 | if constexpr (is_same_v<_Tp, float>) | |
471 | if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 393382))) | |
472 | return static_simd_cast<_V>( | |
473 | cos(static_simd_cast<rebind_simd_t<double, _V>>(__x))); | |
474 | ||
475 | const auto __f = __fold_input(__x); | |
476 | // quadrant | effect | |
477 | // 0 | cosSeries, + | |
478 | // 1 | sinSeries, - | |
479 | // 2 | cosSeries, - | |
480 | // 3 | sinSeries, + | |
481 | using namespace std::experimental::__float_bitwise_operators; | |
482 | const _V __sign_flip | |
483 | = _V(-0.f) & static_simd_cast<_V>((1 + __f._M_quadrant) << 30); | |
484 | ||
485 | const auto __need_cos = (__f._M_quadrant & 1) == 0; | |
486 | if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_cos))) | |
487 | return __sign_flip ^ __cosSeries(__f._M_x); | |
488 | else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_cos))) | |
489 | return __sign_flip ^ __sinSeries(__f._M_x); | |
490 | else // some_of(__need_cos) | |
491 | { | |
492 | _V __r = __sinSeries(__f._M_x); | |
493 | where(__need_cos.__cvt(), __r) = __cosSeries(__f._M_x); | |
494 | return __r ^ __sign_flip; | |
495 | } | |
496 | } | |
497 | } | |
498 | ||
499 | template <typename _Tp> | |
500 | _GLIBCXX_SIMD_ALWAYS_INLINE | |
501 | enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>> | |
502 | cos(simd<_Tp, simd_abi::scalar> __x) | |
503 | { return std::cos(__data(__x)); } | |
504 | ||
505 | //}}} | |
506 | // sin{{{ | |
507 | template <typename _Tp, typename _Abi> | |
508 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> | |
509 | sin(const simd<_Tp, _Abi>& __x) | |
510 | { | |
511 | using _V = simd<_Tp, _Abi>; | |
512 | if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>) | |
513 | return {__private_init, _Abi::_SimdImpl::_S_sin(__data(__x))}; | |
514 | else | |
515 | { | |
516 | if constexpr (is_same_v<_Tp, float>) | |
517 | if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 527449))) | |
518 | return static_simd_cast<_V>( | |
519 | sin(static_simd_cast<rebind_simd_t<double, _V>>(__x))); | |
520 | ||
521 | const auto __f = __fold_input(__x); | |
522 | // quadrant | effect | |
523 | // 0 | sinSeries | |
524 | // 1 | cosSeries | |
525 | // 2 | sinSeries, sign flip | |
526 | // 3 | cosSeries, sign flip | |
527 | using namespace std::experimental::__float_bitwise_operators; | |
528 | const auto __sign_flip | |
529 | = (__x ^ static_simd_cast<_V>(1 - __f._M_quadrant)) & _V(_Tp(-0.)); | |
530 | ||
531 | const auto __need_sin = (__f._M_quadrant & 1) == 0; | |
532 | if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_sin))) | |
533 | return __sign_flip ^ __sinSeries(__f._M_x); | |
534 | else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_sin))) | |
535 | return __sign_flip ^ __cosSeries(__f._M_x); | |
536 | else // some_of(__need_sin) | |
537 | { | |
538 | _V __r = __cosSeries(__f._M_x); | |
539 | where(__need_sin.__cvt(), __r) = __sinSeries(__f._M_x); | |
540 | return __sign_flip ^ __r; | |
541 | } | |
542 | } | |
543 | } | |
544 | ||
545 | template <typename _Tp> | |
546 | _GLIBCXX_SIMD_ALWAYS_INLINE | |
547 | enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>> | |
548 | sin(simd<_Tp, simd_abi::scalar> __x) | |
549 | { return std::sin(__data(__x)); } | |
550 | ||
551 | //}}} | |
552 | _GLIBCXX_SIMD_MATH_CALL_(tan) | |
553 | _GLIBCXX_SIMD_MATH_CALL_(acosh) | |
554 | _GLIBCXX_SIMD_MATH_CALL_(asinh) | |
555 | _GLIBCXX_SIMD_MATH_CALL_(atanh) | |
556 | _GLIBCXX_SIMD_MATH_CALL_(cosh) | |
557 | _GLIBCXX_SIMD_MATH_CALL_(sinh) | |
558 | _GLIBCXX_SIMD_MATH_CALL_(tanh) | |
559 | // }}} | |
560 | // exponential functions {{{ | |
561 | _GLIBCXX_SIMD_MATH_CALL_(exp) | |
562 | _GLIBCXX_SIMD_MATH_CALL_(exp2) | |
563 | _GLIBCXX_SIMD_MATH_CALL_(expm1) | |
564 | ||
565 | // }}} | |
566 | // frexp {{{ | |
567 | #if _GLIBCXX_SIMD_X86INTRIN | |
568 | template <typename _Tp, size_t _Np> | |
569 | _SimdWrapper<_Tp, _Np> | |
570 | __getexp(_SimdWrapper<_Tp, _Np> __x) | |
571 | { | |
572 | if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>()) | |
573 | return __auto_bitcast(_mm_getexp_ps(__to_intrin(__x))); | |
574 | else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>()) | |
575 | return __auto_bitcast(_mm512_getexp_ps(__auto_bitcast(__to_intrin(__x)))); | |
576 | else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>()) | |
577 | return _mm_getexp_pd(__x); | |
578 | else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>()) | |
579 | return __lo128(_mm512_getexp_pd(__auto_bitcast(__x))); | |
580 | else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>()) | |
581 | return _mm256_getexp_ps(__x); | |
582 | else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>()) | |
583 | return __lo256(_mm512_getexp_ps(__auto_bitcast(__x))); | |
584 | else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>()) | |
585 | return _mm256_getexp_pd(__x); | |
586 | else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>()) | |
587 | return __lo256(_mm512_getexp_pd(__auto_bitcast(__x))); | |
588 | else if constexpr (__is_avx512_ps<_Tp, _Np>()) | |
589 | return _mm512_getexp_ps(__x); | |
590 | else if constexpr (__is_avx512_pd<_Tp, _Np>()) | |
591 | return _mm512_getexp_pd(__x); | |
592 | else | |
593 | __assert_unreachable<_Tp>(); | |
594 | } | |
595 | ||
596 | template <typename _Tp, size_t _Np> | |
597 | _SimdWrapper<_Tp, _Np> | |
598 | __getmant_avx512(_SimdWrapper<_Tp, _Np> __x) | |
599 | { | |
600 | if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>()) | |
601 | return __auto_bitcast(_mm_getmant_ps(__to_intrin(__x), _MM_MANT_NORM_p5_1, | |
602 | _MM_MANT_SIGN_src)); | |
603 | else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>()) | |
604 | return __auto_bitcast(_mm512_getmant_ps(__auto_bitcast(__to_intrin(__x)), | |
605 | _MM_MANT_NORM_p5_1, | |
606 | _MM_MANT_SIGN_src)); | |
607 | else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>()) | |
608 | return _mm_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); | |
609 | else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>()) | |
610 | return __lo128(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1, | |
611 | _MM_MANT_SIGN_src)); | |
612 | else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>()) | |
613 | return _mm256_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); | |
614 | else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>()) | |
615 | return __lo256(_mm512_getmant_ps(__auto_bitcast(__x), _MM_MANT_NORM_p5_1, | |
616 | _MM_MANT_SIGN_src)); | |
617 | else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>()) | |
618 | return _mm256_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); | |
619 | else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>()) | |
620 | return __lo256(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1, | |
621 | _MM_MANT_SIGN_src)); | |
622 | else if constexpr (__is_avx512_ps<_Tp, _Np>()) | |
623 | return _mm512_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); | |
624 | else if constexpr (__is_avx512_pd<_Tp, _Np>()) | |
625 | return _mm512_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); | |
626 | else | |
627 | __assert_unreachable<_Tp>(); | |
628 | } | |
629 | #endif // _GLIBCXX_SIMD_X86INTRIN | |
630 | ||
631 | /** | |
632 | * splits @p __v into exponent and mantissa, the sign is kept with the mantissa | |
633 | * | |
634 | * The return value will be in the range [0.5, 1.0[ | |
635 | * The @p __e value will be an integer defining the power-of-two exponent | |
636 | */ | |
637 | template <typename _Tp, typename _Abi> | |
638 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> | |
639 | frexp(const simd<_Tp, _Abi>& __x, _Samesize<int, simd<_Tp, _Abi>>* __exp) | |
640 | { | |
641 | if constexpr (simd_size_v<_Tp, _Abi> == 1) | |
642 | { | |
643 | int __tmp; | |
644 | const auto __r = std::frexp(__x[0], &__tmp); | |
645 | (*__exp)[0] = __tmp; | |
646 | return __r; | |
647 | } | |
648 | else if constexpr (__is_fixed_size_abi_v<_Abi>) | |
62a989ea | 649 | return {__private_init, _Abi::_SimdImpl::_S_frexp(__data(__x), __data(*__exp))}; |
2bcceb6f | 650 | #if _GLIBCXX_SIMD_X86INTRIN |
2bcceb6f MK |
651 | else if constexpr (__have_avx512f) |
652 | { | |
653 | constexpr size_t _Np = simd_size_v<_Tp, _Abi>; | |
654 | constexpr size_t _NI = _Np < 4 ? 4 : _Np; | |
655 | const auto __v = __data(__x); | |
656 | const auto __isnonzero | |
657 | = _Abi::_SimdImpl::_S_isnonzerovalue_mask(__v._M_data); | |
658 | const _SimdWrapper<int, _NI> __exp_plus1 | |
659 | = 1 + __convert<_SimdWrapper<int, _NI>>(__getexp(__v))._M_data; | |
660 | const _SimdWrapper<int, _Np> __e = __wrapper_bitcast<int, _Np>( | |
661 | _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _NI>(__isnonzero), | |
662 | _SimdWrapper<int, _NI>(), __exp_plus1)); | |
663 | simd_abi::deduce_t<int, _Np>::_CommonImpl::_S_store(__e, __exp); | |
664 | return {__private_init, | |
665 | _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _Np>( | |
666 | __isnonzero), | |
667 | __v, __getmant_avx512(__v))}; | |
2bcceb6f | 668 | } |
62a989ea | 669 | #endif // _GLIBCXX_SIMD_X86INTRIN |
2bcceb6f MK |
670 | else |
671 | { | |
672 | // fallback implementation | |
673 | static_assert(sizeof(_Tp) == 4 || sizeof(_Tp) == 8); | |
674 | using _V = simd<_Tp, _Abi>; | |
675 | using _IV = rebind_simd_t<int, _V>; | |
676 | using namespace std::experimental::__proposed; | |
677 | using namespace std::experimental::__float_bitwise_operators; | |
678 | ||
679 | constexpr int __exp_adjust = sizeof(_Tp) == 4 ? 0x7e : 0x3fe; | |
680 | constexpr int __exp_offset = sizeof(_Tp) == 4 ? 0x70 : 0x200; | |
681 | constexpr _Tp __subnorm_scale = sizeof(_Tp) == 4 ? 0x1p112 : 0x1p512; | |
682 | _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __exponent_mask | |
683 | = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000 | |
684 | _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __p5_1_exponent | |
685 | = -(2 - __epsilon_v<_Tp>) / 2; // 0xbf7fffff or 0xbfefffffffffffff | |
686 | ||
687 | _V __mant = __p5_1_exponent & (__exponent_mask | __x); // +/-[.5, 1) | |
688 | const _IV __exponent_bits = __extract_exponent_as_int(__x); | |
689 | if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x)))) | |
690 | { | |
691 | *__exp | |
692 | = simd_cast<_Samesize<int, _V>>(__exponent_bits - __exp_adjust); | |
693 | return __mant; | |
694 | } | |
695 | ||
696 | #if __FINITE_MATH_ONLY__ | |
697 | // at least one element of __x is 0 or subnormal, the rest is normal | |
698 | // (inf and NaN are excluded by -ffinite-math-only) | |
699 | const auto __iszero_inf_nan = __x == 0; | |
700 | #else | |
74ebd129 MK |
701 | using _Ip = __int_for_sizeof_t<_Tp>; |
702 | const auto __as_int = simd_bit_cast<rebind_simd_t<_Ip, _V>>(abs(__x)); | |
703 | const auto __inf = simd_bit_cast<rebind_simd_t<_Ip, _V>>(_V(__infinity_v<_Tp>)); | |
2bcceb6f MK |
704 | const auto __iszero_inf_nan = static_simd_cast<typename _V::mask_type>( |
705 | __as_int == 0 || __as_int >= __inf); | |
706 | #endif | |
707 | ||
708 | const _V __scaled_subnormal = __x * __subnorm_scale; | |
709 | const _V __mant_subnormal | |
710 | = __p5_1_exponent & (__exponent_mask | __scaled_subnormal); | |
711 | where(!isnormal(__x), __mant) = __mant_subnormal; | |
712 | where(__iszero_inf_nan, __mant) = __x; | |
713 | _IV __e = __extract_exponent_as_int(__scaled_subnormal); | |
714 | using _MaskType = | |
715 | typename conditional_t<sizeof(typename _V::value_type) == sizeof(int), | |
716 | _V, _IV>::mask_type; | |
717 | const _MaskType __value_isnormal = isnormal(__x).__cvt(); | |
718 | where(__value_isnormal.__cvt(), __e) = __exponent_bits; | |
719 | static_assert(sizeof(_IV) == sizeof(__value_isnormal)); | |
720 | const _IV __offset | |
74ebd129 MK |
721 | = (simd_bit_cast<_IV>(__value_isnormal) & _IV(__exp_adjust)) |
722 | | (simd_bit_cast<_IV>(static_simd_cast<_MaskType>(__exponent_bits == 0) | |
723 | & static_simd_cast<_MaskType>(__x != 0)) | |
724 | & _IV(__exp_adjust + __exp_offset)); | |
2bcceb6f MK |
725 | *__exp = simd_cast<_Samesize<int, _V>>(__e - __offset); |
726 | return __mant; | |
727 | } | |
728 | } | |
729 | ||
730 | // }}} | |
731 | _GLIBCXX_SIMD_MATH_CALL2_(ldexp, int) | |
732 | _GLIBCXX_SIMD_MATH_CALL_(ilogb) | |
733 | ||
734 | // logarithms {{{ | |
735 | _GLIBCXX_SIMD_MATH_CALL_(log) | |
736 | _GLIBCXX_SIMD_MATH_CALL_(log10) | |
737 | _GLIBCXX_SIMD_MATH_CALL_(log1p) | |
738 | _GLIBCXX_SIMD_MATH_CALL_(log2) | |
739 | ||
740 | //}}} | |
741 | // logb{{{ | |
742 | template <typename _Tp, typename _Abi> | |
743 | enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, _Abi>> | |
744 | logb(const simd<_Tp, _Abi>& __x) | |
745 | { | |
746 | constexpr size_t _Np = simd_size_v<_Tp, _Abi>; | |
747 | if constexpr (_Np == 1) | |
748 | return std::logb(__x[0]); | |
749 | else if constexpr (__is_fixed_size_abi_v<_Abi>) | |
62a989ea | 750 | return {__private_init, _Abi::_SimdImpl::_S_logb(__data(__x))}; |
2bcceb6f MK |
751 | #if _GLIBCXX_SIMD_X86INTRIN // {{{ |
752 | else if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>()) | |
753 | return {__private_init, | |
754 | __auto_bitcast(_mm_getexp_ps(__to_intrin(__as_vector(__x))))}; | |
755 | else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>()) | |
756 | return {__private_init, _mm_getexp_pd(__data(__x))}; | |
757 | else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>()) | |
758 | return {__private_init, _mm256_getexp_ps(__data(__x))}; | |
759 | else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>()) | |
760 | return {__private_init, _mm256_getexp_pd(__data(__x))}; | |
761 | else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>()) | |
762 | return {__private_init, | |
763 | __lo256(_mm512_getexp_ps(__auto_bitcast(__data(__x))))}; | |
764 | else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>()) | |
765 | return {__private_init, | |
766 | __lo256(_mm512_getexp_pd(__auto_bitcast(__data(__x))))}; | |
767 | else if constexpr (__is_avx512_ps<_Tp, _Np>()) | |
768 | return {__private_init, _mm512_getexp_ps(__data(__x))}; | |
769 | else if constexpr (__is_avx512_pd<_Tp, _Np>()) | |
770 | return {__private_init, _mm512_getexp_pd(__data(__x))}; | |
771 | #endif // _GLIBCXX_SIMD_X86INTRIN }}} | |
772 | else | |
773 | { | |
774 | using _V = simd<_Tp, _Abi>; | |
775 | using namespace std::experimental::__proposed; | |
776 | auto __is_normal = isnormal(__x); | |
777 | ||
778 | // work on abs(__x) to reflect the return value on Linux for negative | |
779 | // inputs (domain-error => implementation-defined value is returned) | |
780 | const _V abs_x = abs(__x); | |
781 | ||
782 | // __exponent(__x) returns the exponent value (bias removed) as | |
783 | // simd<_Up> with integral _Up | |
784 | auto&& __exponent = [](const _V& __v) { | |
785 | using namespace std::experimental::__proposed; | |
786 | using _IV = rebind_simd_t< | |
787 | conditional_t<sizeof(_Tp) == sizeof(_LLong), _LLong, int>, _V>; | |
74ebd129 | 788 | return (simd_bit_cast<_IV>(__v) >> (__digits_v<_Tp> - 1)) |
2bcceb6f MK |
789 | - (__max_exponent_v<_Tp> - 1); |
790 | }; | |
791 | _V __r = static_simd_cast<_V>(__exponent(abs_x)); | |
792 | if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__is_normal))) | |
793 | // without corner cases (nan, inf, subnormal, zero) we have our | |
794 | // answer: | |
795 | return __r; | |
796 | const auto __is_zero = __x == 0; | |
797 | const auto __is_nan = isnan(__x); | |
798 | const auto __is_inf = isinf(__x); | |
799 | where(__is_zero, __r) = -__infinity_v<_Tp>; | |
800 | where(__is_nan, __r) = __x; | |
801 | where(__is_inf, __r) = __infinity_v<_Tp>; | |
802 | __is_normal |= __is_zero || __is_nan || __is_inf; | |
803 | if (all_of(__is_normal)) | |
804 | // at this point everything but subnormals is handled | |
805 | return __r; | |
806 | // subnormals repeat the exponent extraction after multiplication of the | |
807 | // input with __a floating point value that has 112 (0x70) in its exponent | |
808 | // (not too big for sp and large enough for dp) | |
809 | const _V __scaled = abs_x * _Tp(0x1p112); | |
810 | _V __scaled_exp = static_simd_cast<_V>(__exponent(__scaled) - 112); | |
811 | where(__is_normal, __scaled_exp) = __r; | |
812 | return __scaled_exp; | |
813 | } | |
814 | } | |
815 | ||
816 | //}}} | |
817 | template <typename _Tp, typename _Abi> | |
818 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> | |
819 | modf(const simd<_Tp, _Abi>& __x, simd<_Tp, _Abi>* __iptr) | |
820 | { | |
62a989ea | 821 | if constexpr (simd_size_v<_Tp, _Abi> == 1) |
2bcceb6f MK |
822 | { |
823 | _Tp __tmp; | |
824 | _Tp __r = std::modf(__x[0], &__tmp); | |
825 | __iptr[0] = __tmp; | |
826 | return __r; | |
827 | } | |
828 | else | |
829 | { | |
830 | const auto __integral = trunc(__x); | |
831 | *__iptr = __integral; | |
832 | auto __r = __x - __integral; | |
833 | #if !__FINITE_MATH_ONLY__ | |
834 | where(isinf(__x), __r) = _Tp(); | |
835 | #endif | |
836 | return copysign(__r, __x); | |
837 | } | |
838 | } | |
839 | ||
840 | _GLIBCXX_SIMD_MATH_CALL2_(scalbn, int) | |
841 | _GLIBCXX_SIMD_MATH_CALL2_(scalbln, long) | |
842 | ||
843 | _GLIBCXX_SIMD_MATH_CALL_(cbrt) | |
844 | ||
845 | _GLIBCXX_SIMD_MATH_CALL_(abs) | |
846 | _GLIBCXX_SIMD_MATH_CALL_(fabs) | |
847 | ||
848 | // [parallel.simd.math] only asks for is_floating_point_v<_Tp> and forgot to | |
849 | // allow signed integral _Tp | |
850 | template <typename _Tp, typename _Abi> | |
851 | enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>> | |
852 | abs(const simd<_Tp, _Abi>& __x) | |
853 | { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; } | |
854 | ||
2bcceb6f MK |
855 | #define _GLIBCXX_SIMD_CVTING2(_NAME) \ |
856 | template <typename _Tp, typename _Abi> \ | |
857 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ | |
858 | const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y) \ | |
859 | { \ | |
860 | return _NAME(__x, __y); \ | |
861 | } \ | |
862 | \ | |
863 | template <typename _Tp, typename _Abi> \ | |
864 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ | |
865 | const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y) \ | |
866 | { \ | |
867 | return _NAME(__x, __y); \ | |
868 | } | |
869 | ||
870 | #define _GLIBCXX_SIMD_CVTING3(_NAME) \ | |
871 | template <typename _Tp, typename _Abi> \ | |
872 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ | |
873 | const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \ | |
874 | const simd<_Tp, _Abi>& __z) \ | |
875 | { \ | |
876 | return _NAME(__x, __y, __z); \ | |
877 | } \ | |
878 | \ | |
879 | template <typename _Tp, typename _Abi> \ | |
880 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ | |
881 | const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \ | |
882 | const simd<_Tp, _Abi>& __z) \ | |
883 | { \ | |
884 | return _NAME(__x, __y, __z); \ | |
885 | } \ | |
886 | \ | |
887 | template <typename _Tp, typename _Abi> \ | |
888 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ | |
889 | const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y, \ | |
890 | const __type_identity_t<simd<_Tp, _Abi>>& __z) \ | |
891 | { \ | |
892 | return _NAME(__x, __y, __z); \ | |
893 | } \ | |
894 | \ | |
895 | template <typename _Tp, typename _Abi> \ | |
896 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ | |
897 | const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \ | |
898 | const __type_identity_t<simd<_Tp, _Abi>>& __z) \ | |
899 | { \ | |
900 | return _NAME(__x, __y, __z); \ | |
901 | } \ | |
902 | \ | |
903 | template <typename _Tp, typename _Abi> \ | |
904 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ | |
905 | const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \ | |
906 | const __type_identity_t<simd<_Tp, _Abi>>& __z) \ | |
907 | { \ | |
908 | return _NAME(__x, __y, __z); \ | |
909 | } \ | |
910 | \ | |
911 | template <typename _Tp, typename _Abi> \ | |
912 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ | |
913 | const __type_identity_t<simd<_Tp, _Abi>>& __x, \ | |
914 | const __type_identity_t<simd<_Tp, _Abi>>& __y, const simd<_Tp, _Abi>& __z) \ | |
915 | { \ | |
916 | return _NAME(__x, __y, __z); \ | |
917 | } | |
918 | ||
919 | template <typename _R, typename _ToApply, typename _Tp, typename... _Tps> | |
920 | _GLIBCXX_SIMD_INTRINSIC _R | |
921 | __fixed_size_apply(_ToApply&& __apply, const _Tp& __arg0, | |
922 | const _Tps&... __args) | |
923 | { | |
924 | return {__private_init, | |
925 | __data(__arg0)._M_apply_per_chunk( | |
926 | [&](auto __impl, const auto&... __inner) { | |
927 | using _V = typename decltype(__impl)::simd_type; | |
928 | return __data(__apply(_V(__private_init, __inner)...)); | |
929 | }, | |
930 | __data(__args)...)}; | |
931 | } | |
932 | ||
933 | template <typename _VV> | |
934 | __remove_cvref_t<_VV> | |
935 | __hypot(_VV __x, _VV __y) | |
936 | { | |
937 | using _V = __remove_cvref_t<_VV>; | |
938 | using _Tp = typename _V::value_type; | |
939 | if constexpr (_V::size() == 1) | |
940 | return std::hypot(_Tp(__x[0]), _Tp(__y[0])); | |
941 | else if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>) | |
942 | { | |
943 | return __fixed_size_apply<_V>([](auto __a, | |
944 | auto __b) { return hypot(__a, __b); }, | |
945 | __x, __y); | |
946 | } | |
947 | else | |
948 | { | |
949 | // A simple solution for _Tp == float would be to cast to double and | |
950 | // simply calculate sqrt(x²+y²) as it can't over-/underflow anymore with | |
951 | // dp. It still needs the Annex F fixups though and isn't faster on | |
952 | // Skylake-AVX512 (not even for SSE and AVX vectors, and really bad for | |
953 | // AVX-512). | |
954 | using namespace __float_bitwise_operators; | |
74ebd129 | 955 | using namespace __proposed; |
2bcceb6f MK |
956 | _V __absx = abs(__x); // no error |
957 | _V __absy = abs(__y); // no error | |
958 | _V __hi = max(__absx, __absy); // no error | |
959 | _V __lo = min(__absy, __absx); // no error | |
960 | ||
961 | // round __hi down to the next power-of-2: | |
962 | _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>); | |
963 | ||
964 | #ifndef __FAST_MATH__ | |
965 | if constexpr (__have_neon && !__have_neon_a32) | |
966 | { // With ARMv7 NEON, we have no subnormals and must use slightly | |
967 | // different strategy | |
968 | const _V __hi_exp = __hi & __inf; | |
969 | _V __scale_back = __hi_exp; | |
970 | // For large exponents (max & max/2) the inversion comes too close | |
971 | // to subnormals. Subtract 3 from the exponent: | |
972 | where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125); | |
973 | // Invert and adjust for the off-by-one error of inversion via xor: | |
974 | const _V __scale = (__scale_back ^ __inf) * _Tp(.5); | |
975 | const _V __h1 = __hi * __scale; | |
976 | const _V __l1 = __lo * __scale; | |
977 | _V __r = __scale_back * sqrt(__h1 * __h1 + __l1 * __l1); | |
978 | // Fix up hypot(0, 0) to not be NaN: | |
979 | where(__hi == 0, __r) = 0; | |
980 | return __r; | |
981 | } | |
982 | #endif | |
983 | ||
984 | #ifdef __FAST_MATH__ | |
985 | // With fast-math, ignore precision of subnormals and inputs from | |
986 | // __finite_max_v/2 to __finite_max_v. This removes all | |
987 | // branching/masking. | |
988 | if constexpr (true) | |
989 | #else | |
990 | if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x)) | |
991 | && all_of(isnormal(__y)))) | |
992 | #endif | |
993 | { | |
994 | const _V __hi_exp = __hi & __inf; | |
995 | //((__hi + __hi) & __inf) ^ __inf almost works for computing | |
996 | //__scale, | |
997 | // except when (__hi + __hi) & __inf == __inf, in which case __scale | |
998 | // becomes 0 (should be min/2 instead) and thus loses the | |
999 | // information from __lo. | |
1000 | #ifdef __FAST_MATH__ | |
1001 | using _Ip = __int_for_sizeof_t<_Tp>; | |
1002 | using _IV = rebind_simd_t<_Ip, _V>; | |
74ebd129 | 1003 | const auto __as_int = simd_bit_cast<_IV>(__hi_exp); |
2bcceb6f | 1004 | const _V __scale |
74ebd129 | 1005 | = simd_bit_cast<_V>(2 * simd_bit_cast<_Ip>(_Tp(1)) - __as_int); |
2bcceb6f MK |
1006 | #else |
1007 | const _V __scale = (__hi_exp ^ __inf) * _Tp(.5); | |
1008 | #endif | |
1009 | _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __mant_mask | |
1010 | = __norm_min_v<_Tp> - __denorm_min_v<_Tp>; | |
1011 | const _V __h1 = (__hi & __mant_mask) | _V(1); | |
1012 | const _V __l1 = __lo * __scale; | |
1013 | return __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1); | |
1014 | } | |
1015 | else | |
1016 | { | |
1017 | // slower path to support subnormals | |
1018 | // if __hi is subnormal, avoid scaling by inf & final mul by 0 | |
1019 | // (which yields NaN) by using min() | |
1020 | _V __scale = _V(1 / __norm_min_v<_Tp>); | |
1021 | // invert exponent w/o error and w/o using the slow divider unit: | |
1022 | // xor inverts the exponent but off by 1. Multiplication with .5 | |
1023 | // adjusts for the discrepancy. | |
1024 | where(__hi >= __norm_min_v<_Tp>, __scale) | |
1025 | = ((__hi & __inf) ^ __inf) * _Tp(.5); | |
1026 | // adjust final exponent for subnormal inputs | |
1027 | _V __hi_exp = __norm_min_v<_Tp>; | |
1028 | where(__hi >= __norm_min_v<_Tp>, __hi_exp) | |
1029 | = __hi & __inf; // no error | |
1030 | _V __h1 = __hi * __scale; // no error | |
1031 | _V __l1 = __lo * __scale; // no error | |
1032 | ||
1033 | // sqrt(x²+y²) = e*sqrt((x/e)²+(y/e)²): | |
1034 | // this ensures no overflow in the argument to sqrt | |
1035 | _V __r = __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1); | |
1036 | #ifdef __STDC_IEC_559__ | |
1037 | // fixup for Annex F requirements | |
1038 | // the naive fixup goes like this: | |
1039 | // | |
1040 | // where(__l1 == 0, __r) = __hi; | |
1041 | // where(isunordered(__x, __y), __r) = __quiet_NaN_v<_Tp>; | |
1042 | // where(isinf(__absx) || isinf(__absy), __r) = __inf; | |
1043 | // | |
1044 | // The fixup can be prepared in parallel with the sqrt, requiring a | |
1045 | // single blend step after hi_exp * sqrt, reducing latency and | |
1046 | // throughput: | |
1047 | _V __fixup = __hi; // __lo == 0 | |
1048 | where(isunordered(__x, __y), __fixup) = __quiet_NaN_v<_Tp>; | |
1049 | where(isinf(__absx) || isinf(__absy), __fixup) = __inf; | |
1050 | where(!(__lo == 0 || isunordered(__x, __y) | |
1051 | || (isinf(__absx) || isinf(__absy))), | |
1052 | __fixup) | |
1053 | = __r; | |
1054 | __r = __fixup; | |
1055 | #endif | |
1056 | return __r; | |
1057 | } | |
1058 | } | |
1059 | } | |
1060 | ||
1061 | template <typename _Tp, typename _Abi> | |
1062 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> | |
1063 | hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y) | |
1064 | { | |
1065 | return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>, | |
1066 | const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x, | |
1067 | __y); | |
1068 | } | |
1069 | ||
1070 | _GLIBCXX_SIMD_CVTING2(hypot) | |
1071 | ||
1072 | template <typename _VV> | |
1073 | __remove_cvref_t<_VV> | |
1074 | __hypot(_VV __x, _VV __y, _VV __z) | |
1075 | { | |
1076 | using _V = __remove_cvref_t<_VV>; | |
1077 | using _Abi = typename _V::abi_type; | |
1078 | using _Tp = typename _V::value_type; | |
1079 | /* FIXME: enable after PR77776 is resolved | |
1080 | if constexpr (_V::size() == 1) | |
1081 | return std::hypot(_Tp(__x[0]), _Tp(__y[0]), _Tp(__z[0])); | |
1082 | else | |
1083 | */ | |
1084 | if constexpr (__is_fixed_size_abi_v<_Abi> && _V::size() > 1) | |
1085 | { | |
1086 | return __fixed_size_apply<simd<_Tp, _Abi>>( | |
1087 | [](auto __a, auto __b, auto __c) { return hypot(__a, __b, __c); }, | |
1088 | __x, __y, __z); | |
1089 | } | |
1090 | else | |
1091 | { | |
1092 | using namespace __float_bitwise_operators; | |
74ebd129 | 1093 | using namespace __proposed; |
2bcceb6f MK |
1094 | const _V __absx = abs(__x); // no error |
1095 | const _V __absy = abs(__y); // no error | |
1096 | const _V __absz = abs(__z); // no error | |
1097 | _V __hi = max(max(__absx, __absy), __absz); // no error | |
1098 | _V __l0 = min(__absz, max(__absx, __absy)); // no error | |
1099 | _V __l1 = min(__absy, __absx); // no error | |
1100 | if constexpr (__digits_v<_Tp> == 64 && __max_exponent_v<_Tp> == 0x4000 | |
1101 | && __min_exponent_v<_Tp> == -0x3FFD && _V::size() == 1) | |
1102 | { // Seems like x87 fp80, where bit 63 is always 1 unless subnormal or | |
1103 | // NaN. In this case the bit-tricks don't work, they require IEC559 | |
1104 | // binary32 or binary64 format. | |
1105 | #ifdef __STDC_IEC_559__ | |
1106 | // fixup for Annex F requirements | |
1107 | if (isinf(__absx[0]) || isinf(__absy[0]) || isinf(__absz[0])) | |
1108 | return __infinity_v<_Tp>; | |
1109 | else if (isunordered(__absx[0], __absy[0] + __absz[0])) | |
1110 | return __quiet_NaN_v<_Tp>; | |
1111 | else if (__l0[0] == 0 && __l1[0] == 0) | |
1112 | return __hi; | |
1113 | #endif | |
1114 | _V __hi_exp = __hi; | |
1115 | const _ULLong __tmp = 0x8000'0000'0000'0000ull; | |
1116 | __builtin_memcpy(&__data(__hi_exp), &__tmp, 8); | |
1117 | const _V __scale = 1 / __hi_exp; | |
1118 | __hi *= __scale; | |
1119 | __l0 *= __scale; | |
1120 | __l1 *= __scale; | |
1121 | return __hi_exp * sqrt((__l0 * __l0 + __l1 * __l1) + __hi * __hi); | |
1122 | } | |
1123 | else | |
1124 | { | |
1125 | // round __hi down to the next power-of-2: | |
1126 | _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>); | |
1127 | ||
1128 | #ifndef __FAST_MATH__ | |
1129 | if constexpr (_V::size() > 1 && __have_neon && !__have_neon_a32) | |
1130 | { // With ARMv7 NEON, we have no subnormals and must use slightly | |
1131 | // different strategy | |
1132 | const _V __hi_exp = __hi & __inf; | |
1133 | _V __scale_back = __hi_exp; | |
1134 | // For large exponents (max & max/2) the inversion comes too | |
1135 | // close to subnormals. Subtract 3 from the exponent: | |
1136 | where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125); | |
1137 | // Invert and adjust for the off-by-one error of inversion via | |
1138 | // xor: | |
1139 | const _V __scale = (__scale_back ^ __inf) * _Tp(.5); | |
1140 | const _V __h1 = __hi * __scale; | |
1141 | __l0 *= __scale; | |
1142 | __l1 *= __scale; | |
1143 | _V __lo = __l0 * __l0 | |
1144 | + __l1 * __l1; // add the two smaller values first | |
1145 | asm("" : "+m"(__lo)); | |
1146 | _V __r = __scale_back * sqrt(__h1 * __h1 + __lo); | |
1147 | // Fix up hypot(0, 0, 0) to not be NaN: | |
1148 | where(__hi == 0, __r) = 0; | |
1149 | return __r; | |
1150 | } | |
1151 | #endif | |
1152 | ||
1153 | #ifdef __FAST_MATH__ | |
1154 | // With fast-math, ignore precision of subnormals and inputs from | |
1155 | // __finite_max_v/2 to __finite_max_v. This removes all | |
1156 | // branching/masking. | |
1157 | if constexpr (true) | |
1158 | #else | |
1159 | if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x)) | |
1160 | && all_of(isnormal(__y)) | |
1161 | && all_of(isnormal(__z)))) | |
1162 | #endif | |
1163 | { | |
1164 | const _V __hi_exp = __hi & __inf; | |
1165 | //((__hi + __hi) & __inf) ^ __inf almost works for computing | |
1166 | //__scale, except when (__hi + __hi) & __inf == __inf, in which | |
1167 | // case __scale | |
1168 | // becomes 0 (should be min/2 instead) and thus loses the | |
1169 | // information from __lo. | |
1170 | #ifdef __FAST_MATH__ | |
1171 | using _Ip = __int_for_sizeof_t<_Tp>; | |
1172 | using _IV = rebind_simd_t<_Ip, _V>; | |
74ebd129 | 1173 | const auto __as_int = simd_bit_cast<_IV>(__hi_exp); |
2bcceb6f | 1174 | const _V __scale |
74ebd129 | 1175 | = simd_bit_cast<_V>(2 * simd_bit_cast<_Ip>(_Tp(1)) - __as_int); |
2bcceb6f MK |
1176 | #else |
1177 | const _V __scale = (__hi_exp ^ __inf) * _Tp(.5); | |
1178 | #endif | |
1179 | constexpr _Tp __mant_mask | |
1180 | = __norm_min_v<_Tp> - __denorm_min_v<_Tp>; | |
1181 | const _V __h1 = (__hi & _V(__mant_mask)) | _V(1); | |
1182 | __l0 *= __scale; | |
1183 | __l1 *= __scale; | |
1184 | const _V __lo | |
1185 | = __l0 * __l0 | |
1186 | + __l1 * __l1; // add the two smaller values first | |
1187 | return __hi_exp * sqrt(__lo + __h1 * __h1); | |
1188 | } | |
1189 | else | |
1190 | { | |
1191 | // slower path to support subnormals | |
1192 | // if __hi is subnormal, avoid scaling by inf & final mul by 0 | |
1193 | // (which yields NaN) by using min() | |
1194 | _V __scale = _V(1 / __norm_min_v<_Tp>); | |
1195 | // invert exponent w/o error and w/o using the slow divider | |
1196 | // unit: xor inverts the exponent but off by 1. Multiplication | |
1197 | // with .5 adjusts for the discrepancy. | |
1198 | where(__hi >= __norm_min_v<_Tp>, __scale) | |
1199 | = ((__hi & __inf) ^ __inf) * _Tp(.5); | |
1200 | // adjust final exponent for subnormal inputs | |
1201 | _V __hi_exp = __norm_min_v<_Tp>; | |
1202 | where(__hi >= __norm_min_v<_Tp>, __hi_exp) | |
1203 | = __hi & __inf; // no error | |
1204 | _V __h1 = __hi * __scale; // no error | |
1205 | __l0 *= __scale; // no error | |
1206 | __l1 *= __scale; // no error | |
1207 | _V __lo = __l0 * __l0 | |
1208 | + __l1 * __l1; // add the two smaller values first | |
1209 | _V __r = __hi_exp * sqrt(__lo + __h1 * __h1); | |
1210 | #ifdef __STDC_IEC_559__ | |
1211 | // fixup for Annex F requirements | |
1212 | _V __fixup = __hi; // __lo == 0 | |
1213 | // where(__lo == 0, __fixup) = __hi; | |
1214 | where(isunordered(__x, __y + __z), __fixup) | |
1215 | = __quiet_NaN_v<_Tp>; | |
1216 | where(isinf(__absx) || isinf(__absy) || isinf(__absz), __fixup) | |
1217 | = __inf; | |
1218 | // Instead of __lo == 0, the following could depend on __h1² == | |
1219 | // __h1² + __lo (i.e. __hi is so much larger than the other two | |
1220 | // inputs that the result is exactly __hi). While this may | |
1221 | // improve precision, it is likely to reduce efficiency if the | |
1222 | // ISA has FMAs (because __h1² + __lo is an FMA, but the | |
1223 | // intermediate | |
1224 | // __h1² must be kept) | |
1225 | where(!(__lo == 0 || isunordered(__x, __y + __z) | |
1226 | || isinf(__absx) || isinf(__absy) || isinf(__absz)), | |
1227 | __fixup) | |
1228 | = __r; | |
1229 | __r = __fixup; | |
1230 | #endif | |
1231 | return __r; | |
1232 | } | |
1233 | } | |
1234 | } | |
1235 | } | |
1236 | ||
1237 | template <typename _Tp, typename _Abi> | |
1238 | _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> | |
1239 | hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y, | |
1240 | const simd<_Tp, _Abi>& __z) | |
1241 | { | |
1242 | return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>, | |
1243 | const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x, | |
1244 | __y, | |
1245 | __z); | |
1246 | } | |
1247 | ||
1248 | _GLIBCXX_SIMD_CVTING3(hypot) | |
1249 | ||
1250 | _GLIBCXX_SIMD_MATH_CALL2_(pow, _Tp) | |
1251 | ||
1252 | _GLIBCXX_SIMD_MATH_CALL_(sqrt) | |
1253 | _GLIBCXX_SIMD_MATH_CALL_(erf) | |
1254 | _GLIBCXX_SIMD_MATH_CALL_(erfc) | |
1255 | _GLIBCXX_SIMD_MATH_CALL_(lgamma) | |
1256 | _GLIBCXX_SIMD_MATH_CALL_(tgamma) | |
1257 | _GLIBCXX_SIMD_MATH_CALL_(ceil) | |
1258 | _GLIBCXX_SIMD_MATH_CALL_(floor) | |
1259 | _GLIBCXX_SIMD_MATH_CALL_(nearbyint) | |
1260 | _GLIBCXX_SIMD_MATH_CALL_(rint) | |
1261 | _GLIBCXX_SIMD_MATH_CALL_(lrint) | |
1262 | _GLIBCXX_SIMD_MATH_CALL_(llrint) | |
1263 | ||
1264 | _GLIBCXX_SIMD_MATH_CALL_(round) | |
1265 | _GLIBCXX_SIMD_MATH_CALL_(lround) | |
1266 | _GLIBCXX_SIMD_MATH_CALL_(llround) | |
1267 | ||
1268 | _GLIBCXX_SIMD_MATH_CALL_(trunc) | |
1269 | ||
1270 | _GLIBCXX_SIMD_MATH_CALL2_(fmod, _Tp) | |
1271 | _GLIBCXX_SIMD_MATH_CALL2_(remainder, _Tp) | |
1272 | _GLIBCXX_SIMD_MATH_CALL3_(remquo, _Tp, int*) | |
1273 | ||
1274 | template <typename _Tp, typename _Abi> | |
1275 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> | |
1276 | copysign(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y) | |
1277 | { | |
1278 | if constexpr (simd_size_v<_Tp, _Abi> == 1) | |
1279 | return std::copysign(__x[0], __y[0]); | |
0237aa8c MK |
1280 | else if constexpr (__is_fixed_size_abi_v<_Abi>) |
1281 | return {__private_init, _Abi::_SimdImpl::_S_copysign(__data(__x), __data(__y))}; | |
2bcceb6f MK |
1282 | else |
1283 | { | |
1284 | using _V = simd<_Tp, _Abi>; | |
1285 | using namespace std::experimental::__float_bitwise_operators; | |
1286 | _GLIBCXX_SIMD_USE_CONSTEXPR_API auto __signmask = _V(1) ^ _V(-1); | |
0237aa8c | 1287 | return (__x & ~__signmask) | (__y & __signmask); |
2bcceb6f MK |
1288 | } |
1289 | } | |
1290 | ||
1291 | _GLIBCXX_SIMD_MATH_CALL2_(nextafter, _Tp) | |
1292 | // not covered in [parallel.simd.math]: | |
1293 | // _GLIBCXX_SIMD_MATH_CALL2_(nexttoward, long double) | |
1294 | _GLIBCXX_SIMD_MATH_CALL2_(fdim, _Tp) | |
1295 | _GLIBCXX_SIMD_MATH_CALL2_(fmax, _Tp) | |
1296 | _GLIBCXX_SIMD_MATH_CALL2_(fmin, _Tp) | |
1297 | ||
1298 | _GLIBCXX_SIMD_MATH_CALL3_(fma, _Tp, _Tp) | |
1299 | _GLIBCXX_SIMD_MATH_CALL_(fpclassify) | |
1300 | _GLIBCXX_SIMD_MATH_CALL_(isfinite) | |
1301 | ||
1302 | // isnan and isinf require special treatment because old glibc may declare | |
1303 | // `int isinf(double)`. | |
1304 | template <typename _Tp, typename _Abi, typename..., | |
1305 | typename _R = _Math_return_type_t<bool, _Tp, _Abi>> | |
1306 | enable_if_t<is_floating_point_v<_Tp>, _R> | |
1307 | isinf(simd<_Tp, _Abi> __x) | |
1308 | { return {__private_init, _Abi::_SimdImpl::_S_isinf(__data(__x))}; } | |
1309 | ||
1310 | template <typename _Tp, typename _Abi, typename..., | |
1311 | typename _R = _Math_return_type_t<bool, _Tp, _Abi>> | |
1312 | enable_if_t<is_floating_point_v<_Tp>, _R> | |
1313 | isnan(simd<_Tp, _Abi> __x) | |
1314 | { return {__private_init, _Abi::_SimdImpl::_S_isnan(__data(__x))}; } | |
1315 | ||
1316 | _GLIBCXX_SIMD_MATH_CALL_(isnormal) | |
1317 | ||
1318 | template <typename..., typename _Tp, typename _Abi> | |
1319 | simd_mask<_Tp, _Abi> | |
1320 | signbit(simd<_Tp, _Abi> __x) | |
1321 | { | |
1322 | if constexpr (is_integral_v<_Tp>) | |
1323 | { | |
1324 | if constexpr (is_unsigned_v<_Tp>) | |
1325 | return simd_mask<_Tp, _Abi>{}; // false | |
1326 | else | |
1327 | return __x < 0; | |
1328 | } | |
1329 | else | |
1330 | return {__private_init, _Abi::_SimdImpl::_S_signbit(__data(__x))}; | |
1331 | } | |
1332 | ||
1333 | _GLIBCXX_SIMD_MATH_CALL2_(isgreater, _Tp) | |
1334 | _GLIBCXX_SIMD_MATH_CALL2_(isgreaterequal, _Tp) | |
1335 | _GLIBCXX_SIMD_MATH_CALL2_(isless, _Tp) | |
1336 | _GLIBCXX_SIMD_MATH_CALL2_(islessequal, _Tp) | |
1337 | _GLIBCXX_SIMD_MATH_CALL2_(islessgreater, _Tp) | |
1338 | _GLIBCXX_SIMD_MATH_CALL2_(isunordered, _Tp) | |
1339 | ||
1340 | /* not covered in [parallel.simd.math] | |
1341 | template <typename _Abi> __doublev<_Abi> nan(const char* tagp); | |
1342 | template <typename _Abi> __floatv<_Abi> nanf(const char* tagp); | |
1343 | template <typename _Abi> __ldoublev<_Abi> nanl(const char* tagp); | |
1344 | ||
1345 | template <typename _V> struct simd_div_t { | |
1346 | _V quot, rem; | |
1347 | }; | |
1348 | ||
1349 | template <typename _Abi> | |
1350 | simd_div_t<_SCharv<_Abi>> div(_SCharv<_Abi> numer, | |
1351 | _SCharv<_Abi> denom); | |
1352 | template <typename _Abi> | |
1353 | simd_div_t<__shortv<_Abi>> div(__shortv<_Abi> numer, | |
1354 | __shortv<_Abi> denom); | |
1355 | template <typename _Abi> | |
1356 | simd_div_t<__intv<_Abi>> div(__intv<_Abi> numer, __intv<_Abi> denom); | |
1357 | template <typename _Abi> | |
1358 | simd_div_t<__longv<_Abi>> div(__longv<_Abi> numer, | |
1359 | __longv<_Abi> denom); | |
1360 | template <typename _Abi> | |
1361 | simd_div_t<__llongv<_Abi>> div(__llongv<_Abi> numer, | |
1362 | __llongv<_Abi> denom); | |
1363 | */ | |
1364 | ||
1365 | // special math {{{ | |
1366 | template <typename _Tp, typename _Abi> | |
1367 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> | |
1368 | assoc_laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, | |
1369 | const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m, | |
1370 | const simd<_Tp, _Abi>& __x) | |
1371 | { | |
1372 | return simd<_Tp, _Abi>([&](auto __i) { | |
1373 | return std::assoc_laguerre(__n[__i], __m[__i], __x[__i]); | |
1374 | }); | |
1375 | } | |
1376 | ||
1377 | template <typename _Tp, typename _Abi> | |
1378 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> | |
1379 | assoc_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, | |
1380 | const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m, | |
1381 | const simd<_Tp, _Abi>& __x) | |
1382 | { | |
1383 | return simd<_Tp, _Abi>([&](auto __i) { | |
1384 | return std::assoc_legendre(__n[__i], __m[__i], __x[__i]); | |
1385 | }); | |
1386 | } | |
1387 | ||
1388 | _GLIBCXX_SIMD_MATH_CALL2_(beta, _Tp) | |
1389 | _GLIBCXX_SIMD_MATH_CALL_(comp_ellint_1) | |
1390 | _GLIBCXX_SIMD_MATH_CALL_(comp_ellint_2) | |
1391 | _GLIBCXX_SIMD_MATH_CALL2_(comp_ellint_3, _Tp) | |
1392 | _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_i, _Tp) | |
1393 | _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_j, _Tp) | |
1394 | _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_k, _Tp) | |
1395 | _GLIBCXX_SIMD_MATH_CALL2_(cyl_neumann, _Tp) | |
1396 | _GLIBCXX_SIMD_MATH_CALL2_(ellint_1, _Tp) | |
1397 | _GLIBCXX_SIMD_MATH_CALL2_(ellint_2, _Tp) | |
1398 | _GLIBCXX_SIMD_MATH_CALL3_(ellint_3, _Tp, _Tp) | |
1399 | _GLIBCXX_SIMD_MATH_CALL_(expint) | |
1400 | ||
1401 | template <typename _Tp, typename _Abi> | |
1402 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> | |
1403 | hermite(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, | |
1404 | const simd<_Tp, _Abi>& __x) | |
1405 | { | |
1406 | return simd<_Tp, _Abi>( | |
1407 | [&](auto __i) { return std::hermite(__n[__i], __x[__i]); }); | |
1408 | } | |
1409 | ||
1410 | template <typename _Tp, typename _Abi> | |
1411 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> | |
1412 | laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, | |
1413 | const simd<_Tp, _Abi>& __x) | |
1414 | { | |
1415 | return simd<_Tp, _Abi>( | |
1416 | [&](auto __i) { return std::laguerre(__n[__i], __x[__i]); }); | |
1417 | } | |
1418 | ||
1419 | template <typename _Tp, typename _Abi> | |
1420 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> | |
1421 | legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, | |
1422 | const simd<_Tp, _Abi>& __x) | |
1423 | { | |
1424 | return simd<_Tp, _Abi>( | |
1425 | [&](auto __i) { return std::legendre(__n[__i], __x[__i]); }); | |
1426 | } | |
1427 | ||
1428 | _GLIBCXX_SIMD_MATH_CALL_(riemann_zeta) | |
1429 | ||
1430 | template <typename _Tp, typename _Abi> | |
1431 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> | |
1432 | sph_bessel(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, | |
1433 | const simd<_Tp, _Abi>& __x) | |
1434 | { | |
1435 | return simd<_Tp, _Abi>( | |
1436 | [&](auto __i) { return std::sph_bessel(__n[__i], __x[__i]); }); | |
1437 | } | |
1438 | ||
1439 | template <typename _Tp, typename _Abi> | |
1440 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> | |
1441 | sph_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __l, | |
1442 | const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m, | |
1443 | const simd<_Tp, _Abi>& theta) | |
1444 | { | |
1445 | return simd<_Tp, _Abi>([&](auto __i) { | |
1446 | return std::assoc_legendre(__l[__i], __m[__i], theta[__i]); | |
1447 | }); | |
1448 | } | |
1449 | ||
1450 | template <typename _Tp, typename _Abi> | |
1451 | enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> | |
1452 | sph_neumann(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, | |
1453 | const simd<_Tp, _Abi>& __x) | |
1454 | { | |
1455 | return simd<_Tp, _Abi>( | |
1456 | [&](auto __i) { return std::sph_neumann(__n[__i], __x[__i]); }); | |
1457 | } | |
1458 | // }}} | |
1459 | ||
62a989ea MK |
1460 | #undef _GLIBCXX_SIMD_CVTING2 |
1461 | #undef _GLIBCXX_SIMD_CVTING3 | |
2bcceb6f MK |
1462 | #undef _GLIBCXX_SIMD_MATH_CALL_ |
1463 | #undef _GLIBCXX_SIMD_MATH_CALL2_ | |
1464 | #undef _GLIBCXX_SIMD_MATH_CALL3_ | |
1465 | ||
1466 | _GLIBCXX_SIMD_END_NAMESPACE | |
1467 | ||
1468 | #endif // __cplusplus >= 201703L | |
1469 | #endif // _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_ | |
1470 | ||
1471 | // vim: foldmethod=marker sw=2 ts=8 noet sts=2 |