]> git.ipfire.org Git - thirdparty/gcc.git/blame - libstdc++-v3/include/experimental/bits/simd_math.h
Update copyright years.
[thirdparty/gcc.git] / libstdc++-v3 / include / experimental / bits / simd_math.h
CommitLineData
2bcceb6f
MK
1// Math overloads for simd -*- C++ -*-
2
7adcbafe 3// Copyright (C) 2020-2022 Free Software Foundation, Inc.
2bcceb6f
MK
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
26#define _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
27
28#if __cplusplus >= 201703L
29
30#include <utility>
31#include <iomanip>
32
33_GLIBCXX_SIMD_BEGIN_NAMESPACE
34template <typename _Tp, typename _V>
35 using _Samesize = fixed_size_simd<_Tp, _V::size()>;
36
37// _Math_return_type {{{
38template <typename _DoubleR, typename _Tp, typename _Abi>
39 struct _Math_return_type;
40
41template <typename _DoubleR, typename _Tp, typename _Abi>
42 using _Math_return_type_t =
43 typename _Math_return_type<_DoubleR, _Tp, _Abi>::type;
44
45template <typename _Tp, typename _Abi>
46 struct _Math_return_type<double, _Tp, _Abi>
47 { using type = simd<_Tp, _Abi>; };
48
49template <typename _Tp, typename _Abi>
50 struct _Math_return_type<bool, _Tp, _Abi>
51 { using type = simd_mask<_Tp, _Abi>; };
52
53template <typename _DoubleR, typename _Tp, typename _Abi>
54 struct _Math_return_type
55 { using type = fixed_size_simd<_DoubleR, simd_size_v<_Tp, _Abi>>; };
56
57//}}}
58// _GLIBCXX_SIMD_MATH_CALL_ {{{
59#define _GLIBCXX_SIMD_MATH_CALL_(__name) \
60template <typename _Tp, typename _Abi, typename..., \
61 typename _R = _Math_return_type_t< \
62 decltype(std::__name(declval<double>())), _Tp, _Abi>> \
63 enable_if_t<is_floating_point_v<_Tp>, _R> \
64 __name(simd<_Tp, _Abi> __x) \
65 { return {__private_init, _Abi::_SimdImpl::_S_##__name(__data(__x))}; }
66
67// }}}
68//_Extra_argument_type{{{
69template <typename _Up, typename _Tp, typename _Abi>
70 struct _Extra_argument_type;
71
72template <typename _Tp, typename _Abi>
73 struct _Extra_argument_type<_Tp*, _Tp, _Abi>
74 {
75 using type = simd<_Tp, _Abi>*;
76 static constexpr double* declval();
77 static constexpr bool __needs_temporary_scalar = true;
78
79 _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
80 { return &__data(*__x); }
81 };
82
83template <typename _Up, typename _Tp, typename _Abi>
84 struct _Extra_argument_type<_Up*, _Tp, _Abi>
85 {
86 static_assert(is_integral_v<_Up>);
87 using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>*;
88 static constexpr _Up* declval();
89 static constexpr bool __needs_temporary_scalar = true;
90
91 _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
92 { return &__data(*__x); }
93 };
94
95template <typename _Tp, typename _Abi>
96 struct _Extra_argument_type<_Tp, _Tp, _Abi>
97 {
98 using type = simd<_Tp, _Abi>;
99 static constexpr double declval();
100 static constexpr bool __needs_temporary_scalar = false;
101
102 _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
103 _S_data(const type& __x)
104 { return __data(__x); }
105 };
106
107template <typename _Up, typename _Tp, typename _Abi>
108 struct _Extra_argument_type
109 {
110 static_assert(is_integral_v<_Up>);
111 using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>;
112 static constexpr _Up declval();
113 static constexpr bool __needs_temporary_scalar = false;
114
115 _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
116 _S_data(const type& __x)
117 { return __data(__x); }
118 };
119
120//}}}
121// _GLIBCXX_SIMD_MATH_CALL2_ {{{
addd5f0e 122#define _GLIBCXX_SIMD_MATH_CALL2_(__name, __arg2) \
2bcceb6f
MK
123template < \
124 typename _Tp, typename _Abi, typename..., \
addd5f0e 125 typename _Arg2 = _Extra_argument_type<__arg2, _Tp, _Abi>, \
2bcceb6f
MK
126 typename _R = _Math_return_type_t< \
127 decltype(std::__name(declval<double>(), _Arg2::declval())), _Tp, _Abi>> \
128 enable_if_t<is_floating_point_v<_Tp>, _R> \
129 __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y) \
130 { \
131 return {__private_init, \
132 _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y))}; \
133 } \
134template <typename _Up, typename _Tp, typename _Abi> \
135 _GLIBCXX_SIMD_INTRINSIC _Math_return_type_t< \
136 decltype(std::__name( \
137 declval<double>(), \
138 declval<enable_if_t< \
139 conjunction_v< \
addd5f0e 140 is_same<__arg2, _Tp>, \
2bcceb6f
MK
141 negation<is_same<__remove_cvref_t<_Up>, simd<_Tp, _Abi>>>, \
142 is_convertible<_Up, simd<_Tp, _Abi>>, is_floating_point<_Tp>>, \
143 double>>())), \
144 _Tp, _Abi> \
145 __name(_Up&& __xx, const simd<_Tp, _Abi>& __yy) \
146 { return __name(simd<_Tp, _Abi>(static_cast<_Up&&>(__xx)), __yy); }
147
148// }}}
149// _GLIBCXX_SIMD_MATH_CALL3_ {{{
addd5f0e 150#define _GLIBCXX_SIMD_MATH_CALL3_(__name, __arg2, __arg3) \
2bcceb6f 151template <typename _Tp, typename _Abi, typename..., \
addd5f0e
MK
152 typename _Arg2 = _Extra_argument_type<__arg2, _Tp, _Abi>, \
153 typename _Arg3 = _Extra_argument_type<__arg3, _Tp, _Abi>, \
2bcceb6f
MK
154 typename _R = _Math_return_type_t< \
155 decltype(std::__name(declval<double>(), _Arg2::declval(), \
156 _Arg3::declval())), \
157 _Tp, _Abi>> \
158 enable_if_t<is_floating_point_v<_Tp>, _R> \
159 __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y, \
160 const typename _Arg3::type& __z) \
161 { \
162 return {__private_init, \
163 _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y), \
164 _Arg3::_S_data(__z))}; \
165 } \
166template < \
167 typename _T0, typename _T1, typename _T2, typename..., \
168 typename _U0 = __remove_cvref_t<_T0>, \
169 typename _U1 = __remove_cvref_t<_T1>, \
170 typename _U2 = __remove_cvref_t<_T2>, \
171 typename _Simd = conditional_t<is_simd_v<_U1>, _U1, _U2>, \
172 typename = enable_if_t<conjunction_v< \
173 is_simd<_Simd>, is_convertible<_T0&&, _Simd>, \
174 is_convertible<_T1&&, _Simd>, is_convertible<_T2&&, _Simd>, \
175 negation<conjunction< \
176 is_simd<_U0>, is_floating_point<__value_type_or_identity_t<_U0>>>>>>> \
177 _GLIBCXX_SIMD_INTRINSIC decltype(__name(declval<const _Simd&>(), \
178 declval<const _Simd&>(), \
179 declval<const _Simd&>())) \
180 __name(_T0&& __xx, _T1&& __yy, _T2&& __zz) \
181 { \
182 return __name(_Simd(static_cast<_T0&&>(__xx)), \
183 _Simd(static_cast<_T1&&>(__yy)), \
184 _Simd(static_cast<_T2&&>(__zz))); \
185 }
186
187// }}}
188// __cosSeries {{{
189template <typename _Abi>
190 _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
191 __cosSeries(const simd<float, _Abi>& __x)
192 {
193 const simd<float, _Abi> __x2 = __x * __x;
194 simd<float, _Abi> __y;
195 __y = 0x1.ap-16f; // 1/8!
196 __y = __y * __x2 - 0x1.6c1p-10f; // -1/6!
197 __y = __y * __x2 + 0x1.555556p-5f; // 1/4!
198 return __y * (__x2 * __x2) - .5f * __x2 + 1.f;
199 }
200
201template <typename _Abi>
202 _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
203 __cosSeries(const simd<double, _Abi>& __x)
204 {
205 const simd<double, _Abi> __x2 = __x * __x;
206 simd<double, _Abi> __y;
207 __y = 0x1.AC00000000000p-45; // 1/16!
208 __y = __y * __x2 - 0x1.9394000000000p-37; // -1/14!
209 __y = __y * __x2 + 0x1.1EED8C0000000p-29; // 1/12!
210 __y = __y * __x2 - 0x1.27E4FB7400000p-22; // -1/10!
211 __y = __y * __x2 + 0x1.A01A01A018000p-16; // 1/8!
212 __y = __y * __x2 - 0x1.6C16C16C16C00p-10; // -1/6!
213 __y = __y * __x2 + 0x1.5555555555554p-5; // 1/4!
214 return (__y * __x2 - .5f) * __x2 + 1.f;
215 }
216
217// }}}
218// __sinSeries {{{
219template <typename _Abi>
220 _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
221 __sinSeries(const simd<float, _Abi>& __x)
222 {
223 const simd<float, _Abi> __x2 = __x * __x;
224 simd<float, _Abi> __y;
225 __y = -0x1.9CC000p-13f; // -1/7!
226 __y = __y * __x2 + 0x1.111100p-7f; // 1/5!
227 __y = __y * __x2 - 0x1.555556p-3f; // -1/3!
228 return __y * (__x2 * __x) + __x;
229 }
230
231template <typename _Abi>
232 _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
233 __sinSeries(const simd<double, _Abi>& __x)
234 {
235 // __x = [0, 0.7854 = pi/4]
236 // __x² = [0, 0.6169 = pi²/8]
237 const simd<double, _Abi> __x2 = __x * __x;
238 simd<double, _Abi> __y;
239 __y = -0x1.ACF0000000000p-41; // -1/15!
240 __y = __y * __x2 + 0x1.6124400000000p-33; // 1/13!
241 __y = __y * __x2 - 0x1.AE64567000000p-26; // -1/11!
242 __y = __y * __x2 + 0x1.71DE3A5540000p-19; // 1/9!
243 __y = __y * __x2 - 0x1.A01A01A01A000p-13; // -1/7!
244 __y = __y * __x2 + 0x1.1111111111110p-7; // 1/5!
245 __y = __y * __x2 - 0x1.5555555555555p-3; // -1/3!
246 return __y * (__x2 * __x) + __x;
247 }
248
249// }}}
250// __zero_low_bits {{{
251template <int _Bits, typename _Tp, typename _Abi>
252 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
253 __zero_low_bits(simd<_Tp, _Abi> __x)
254 {
255 const simd<_Tp, _Abi> __bitmask
256 = __bit_cast<_Tp>(~make_unsigned_t<__int_for_sizeof_t<_Tp>>() << _Bits);
257 return {__private_init,
258 _Abi::_SimdImpl::_S_bit_and(__data(__x), __data(__bitmask))};
259 }
260
261// }}}
262// __fold_input {{{
263
264/**@internal
265 * Fold @p x into [-¼π, ¼π] and remember the quadrant it came from:
266 * quadrant 0: [-¼π, ¼π]
267 * quadrant 1: [ ¼π, ¾π]
268 * quadrant 2: [ ¾π, 1¼π]
269 * quadrant 3: [1¼π, 1¾π]
270 *
271 * The algorithm determines `y` as the multiple `x - y * ¼π = [-¼π, ¼π]`. Using
272 * a bitmask, `y` is reduced to `quadrant`. `y` can be calculated as
273 * ```
274 * y = trunc(x / ¼π);
275 * y += fmod(y, 2);
276 * ```
277 * This can be simplified by moving the (implicit) division by 2 into the
278 * truncation expression. The `+= fmod` effect can the be achieved by using
279 * rounding instead of truncation: `y = round(x / ½π) * 2`. If precision allows,
280 * `2/Ï€ * x` is better (faster).
281 */
282template <typename _Tp, typename _Abi>
283 struct _Folded
284 {
285 simd<_Tp, _Abi> _M_x;
286 rebind_simd_t<int, simd<_Tp, _Abi>> _M_quadrant;
287 };
288
289namespace __math_float {
290inline constexpr float __pi_over_4 = 0x1.921FB6p-1f; // π/4
291inline constexpr float __2_over_pi = 0x1.45F306p-1f; // 2/Ï€
292inline constexpr float __pi_2_5bits0
293 = 0x1.921fc0p0f; // π/2, 5 0-bits (least significant)
294inline constexpr float __pi_2_5bits0_rem
295 = -0x1.5777a6p-21f; // π/2 - __pi_2_5bits0
296} // namespace __math_float
297namespace __math_double {
298inline constexpr double __pi_over_4 = 0x1.921fb54442d18p-1; // π/4
299inline constexpr double __2_over_pi = 0x1.45F306DC9C883p-1; // 2/Ï€
300inline constexpr double __pi_2 = 0x1.921fb54442d18p0; // π/2
301} // namespace __math_double
302
303template <typename _Abi>
304 _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<float, _Abi>
305 __fold_input(const simd<float, _Abi>& __x)
306 {
307 using _V = simd<float, _Abi>;
308 using _IV = rebind_simd_t<int, _V>;
309 using namespace __math_float;
310 _Folded<float, _Abi> __r;
311 __r._M_x = abs(__x);
312#if 0
313 // zero most mantissa bits:
314 constexpr float __1_over_pi = 0x1.45F306p-2f; // 1/Ï€
315 const auto __y = (__r._M_x * __1_over_pi + 0x1.8p23f) - 0x1.8p23f;
316 // split π into 4 parts, the first three with 13 trailing zeros (to make the
317 // following multiplications precise):
318 constexpr float __pi0 = 0x1.920000p1f;
319 constexpr float __pi1 = 0x1.fb4000p-11f;
320 constexpr float __pi2 = 0x1.444000p-23f;
321 constexpr float __pi3 = 0x1.68c234p-38f;
322 __r._M_x - __y*__pi0 - __y*__pi1 - __y*__pi2 - __y*__pi3
323#else
324 if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
325 __r._M_quadrant = 0;
326 else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 6 * __pi_over_4)))
327 {
328 const _V __y = nearbyint(__r._M_x * __2_over_pi);
329 __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // __y mod 4
330 __r._M_x -= __y * __pi_2_5bits0;
331 __r._M_x -= __y * __pi_2_5bits0_rem;
332 }
333 else
334 {
335 using __math_double::__2_over_pi;
336 using __math_double::__pi_2;
337 using _VD = rebind_simd_t<double, _V>;
338 _VD __xd = static_simd_cast<_VD>(__r._M_x);
339 _VD __y = nearbyint(__xd * __2_over_pi);
340 __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // = __y mod 4
341 __r._M_x = static_simd_cast<_V>(__xd - __y * __pi_2);
342 }
343#endif
344 return __r;
345 }
346
347template <typename _Abi>
348 _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<double, _Abi>
349 __fold_input(const simd<double, _Abi>& __x)
350 {
351 using _V = simd<double, _Abi>;
352 using _IV = rebind_simd_t<int, _V>;
353 using namespace __math_double;
354
355 _Folded<double, _Abi> __r;
356 __r._M_x = abs(__x);
357 if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
358 {
359 __r._M_quadrant = 0;
360 return __r;
361 }
362 const _V __y = nearbyint(__r._M_x / (2 * __pi_over_4));
363 __r._M_quadrant = static_simd_cast<_IV>(__y) & 3;
364
365 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 1025 * __pi_over_4)))
366 {
367 // x - y * pi/2, y uses no more than 11 mantissa bits
368 __r._M_x -= __y * 0x1.921FB54443000p0;
369 __r._M_x -= __y * -0x1.73DCB3B39A000p-43;
370 __r._M_x -= __y * 0x1.45C06E0E68948p-86;
371 }
372 else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__y <= 0x1.0p30)))
373 {
374 // x - y * pi/2, y uses no more than 29 mantissa bits
375 __r._M_x -= __y * 0x1.921FB40000000p0;
376 __r._M_x -= __y * 0x1.4442D00000000p-24;
377 __r._M_x -= __y * 0x1.8469898CC5170p-48;
378 }
379 else
380 {
381 // x - y * pi/2, y may require all mantissa bits
382 const _V __y_hi = __zero_low_bits<26>(__y);
383 const _V __y_lo = __y - __y_hi;
384 const auto __pi_2_1 = 0x1.921FB50000000p0;
385 const auto __pi_2_2 = 0x1.110B460000000p-26;
386 const auto __pi_2_3 = 0x1.1A62630000000p-54;
387 const auto __pi_2_4 = 0x1.8A2E03707344Ap-81;
388 __r._M_x = __r._M_x - __y_hi * __pi_2_1
389 - max(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
390 - min(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
391 - max(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
392 - min(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
393 - max(__y * __pi_2_4, __y_lo * __pi_2_3)
394 - min(__y * __pi_2_4, __y_lo * __pi_2_3);
395 }
396 return __r;
397 }
398
399// }}}
400// __extract_exponent_as_int {{{
401template <typename _Tp, typename _Abi>
402 rebind_simd_t<int, simd<_Tp, _Abi>>
403 __extract_exponent_as_int(const simd<_Tp, _Abi>& __v)
404 {
405 using _Vp = simd<_Tp, _Abi>;
406 using _Up = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
407 using namespace std::experimental::__float_bitwise_operators;
74ebd129 408 using namespace std::experimental::__proposed;
2bcceb6f
MK
409 const _Vp __exponent_mask
410 = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
411 return static_simd_cast<rebind_simd_t<int, _Vp>>(
74ebd129 412 simd_bit_cast<rebind_simd_t<_Up, _Vp>>(__v & __exponent_mask)
2bcceb6f
MK
413 >> (__digits_v<_Tp> - 1));
414 }
415
416// }}}
417// __impl_or_fallback {{{
418template <typename ImplFun, typename FallbackFun, typename... _Args>
419 _GLIBCXX_SIMD_INTRINSIC auto
420 __impl_or_fallback_dispatch(int, ImplFun&& __impl_fun, FallbackFun&&,
421 _Args&&... __args)
422 -> decltype(__impl_fun(static_cast<_Args&&>(__args)...))
423 { return __impl_fun(static_cast<_Args&&>(__args)...); }
424
425template <typename ImplFun, typename FallbackFun, typename... _Args>
426 inline auto
427 __impl_or_fallback_dispatch(float, ImplFun&&, FallbackFun&& __fallback_fun,
428 _Args&&... __args)
429 -> decltype(__fallback_fun(static_cast<_Args&&>(__args)...))
430 { return __fallback_fun(static_cast<_Args&&>(__args)...); }
431
432template <typename... _Args>
433 _GLIBCXX_SIMD_INTRINSIC auto
434 __impl_or_fallback(_Args&&... __args)
435 {
436 return __impl_or_fallback_dispatch(int(), static_cast<_Args&&>(__args)...);
437 }
438//}}}
439
440// trigonometric functions {{{
441_GLIBCXX_SIMD_MATH_CALL_(acos)
442_GLIBCXX_SIMD_MATH_CALL_(asin)
443_GLIBCXX_SIMD_MATH_CALL_(atan)
444_GLIBCXX_SIMD_MATH_CALL2_(atan2, _Tp)
445
446/*
447 * algorithm for sine and cosine:
448 *
449 * The result can be calculated with sine or cosine depending on the π/4 section
450 * the input is in. sine ≈ __x + __x³ cosine ≈ 1 - __x²
451 *
452 * sine:
453 * Map -__x to __x and invert the output
454 * Extend precision of __x - n * π/4 by calculating
455 * ((__x - n * p1) - n * p2) - n * p3 (p1 + p2 + p3 = π/4)
456 *
457 * Calculate Taylor series with tuned coefficients.
458 * Fix sign.
459 */
460// cos{{{
461template <typename _Tp, typename _Abi>
462 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
463 cos(const simd<_Tp, _Abi>& __x)
464 {
465 using _V = simd<_Tp, _Abi>;
466 if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
467 return {__private_init, _Abi::_SimdImpl::_S_cos(__data(__x))};
468 else
469 {
470 if constexpr (is_same_v<_Tp, float>)
471 if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 393382)))
472 return static_simd_cast<_V>(
473 cos(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
474
475 const auto __f = __fold_input(__x);
476 // quadrant | effect
477 // 0 | cosSeries, +
478 // 1 | sinSeries, -
479 // 2 | cosSeries, -
480 // 3 | sinSeries, +
481 using namespace std::experimental::__float_bitwise_operators;
482 const _V __sign_flip
483 = _V(-0.f) & static_simd_cast<_V>((1 + __f._M_quadrant) << 30);
484
485 const auto __need_cos = (__f._M_quadrant & 1) == 0;
486 if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_cos)))
487 return __sign_flip ^ __cosSeries(__f._M_x);
488 else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_cos)))
489 return __sign_flip ^ __sinSeries(__f._M_x);
490 else // some_of(__need_cos)
491 {
492 _V __r = __sinSeries(__f._M_x);
493 where(__need_cos.__cvt(), __r) = __cosSeries(__f._M_x);
494 return __r ^ __sign_flip;
495 }
496 }
497 }
498
499template <typename _Tp>
500 _GLIBCXX_SIMD_ALWAYS_INLINE
501 enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
502 cos(simd<_Tp, simd_abi::scalar> __x)
503 { return std::cos(__data(__x)); }
504
505//}}}
506// sin{{{
507template <typename _Tp, typename _Abi>
508 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
509 sin(const simd<_Tp, _Abi>& __x)
510 {
511 using _V = simd<_Tp, _Abi>;
512 if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
513 return {__private_init, _Abi::_SimdImpl::_S_sin(__data(__x))};
514 else
515 {
516 if constexpr (is_same_v<_Tp, float>)
517 if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 527449)))
518 return static_simd_cast<_V>(
519 sin(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
520
521 const auto __f = __fold_input(__x);
522 // quadrant | effect
523 // 0 | sinSeries
524 // 1 | cosSeries
525 // 2 | sinSeries, sign flip
526 // 3 | cosSeries, sign flip
527 using namespace std::experimental::__float_bitwise_operators;
528 const auto __sign_flip
529 = (__x ^ static_simd_cast<_V>(1 - __f._M_quadrant)) & _V(_Tp(-0.));
530
531 const auto __need_sin = (__f._M_quadrant & 1) == 0;
532 if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_sin)))
533 return __sign_flip ^ __sinSeries(__f._M_x);
534 else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_sin)))
535 return __sign_flip ^ __cosSeries(__f._M_x);
536 else // some_of(__need_sin)
537 {
538 _V __r = __cosSeries(__f._M_x);
539 where(__need_sin.__cvt(), __r) = __sinSeries(__f._M_x);
540 return __sign_flip ^ __r;
541 }
542 }
543 }
544
545template <typename _Tp>
546 _GLIBCXX_SIMD_ALWAYS_INLINE
547 enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
548 sin(simd<_Tp, simd_abi::scalar> __x)
549 { return std::sin(__data(__x)); }
550
551//}}}
552_GLIBCXX_SIMD_MATH_CALL_(tan)
553_GLIBCXX_SIMD_MATH_CALL_(acosh)
554_GLIBCXX_SIMD_MATH_CALL_(asinh)
555_GLIBCXX_SIMD_MATH_CALL_(atanh)
556_GLIBCXX_SIMD_MATH_CALL_(cosh)
557_GLIBCXX_SIMD_MATH_CALL_(sinh)
558_GLIBCXX_SIMD_MATH_CALL_(tanh)
559// }}}
560// exponential functions {{{
561_GLIBCXX_SIMD_MATH_CALL_(exp)
562_GLIBCXX_SIMD_MATH_CALL_(exp2)
563_GLIBCXX_SIMD_MATH_CALL_(expm1)
564
565// }}}
566// frexp {{{
567#if _GLIBCXX_SIMD_X86INTRIN
568template <typename _Tp, size_t _Np>
569 _SimdWrapper<_Tp, _Np>
570 __getexp(_SimdWrapper<_Tp, _Np> __x)
571 {
572 if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
573 return __auto_bitcast(_mm_getexp_ps(__to_intrin(__x)));
574 else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
575 return __auto_bitcast(_mm512_getexp_ps(__auto_bitcast(__to_intrin(__x))));
576 else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
577 return _mm_getexp_pd(__x);
578 else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
579 return __lo128(_mm512_getexp_pd(__auto_bitcast(__x)));
580 else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
581 return _mm256_getexp_ps(__x);
582 else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
583 return __lo256(_mm512_getexp_ps(__auto_bitcast(__x)));
584 else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
585 return _mm256_getexp_pd(__x);
586 else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
587 return __lo256(_mm512_getexp_pd(__auto_bitcast(__x)));
588 else if constexpr (__is_avx512_ps<_Tp, _Np>())
589 return _mm512_getexp_ps(__x);
590 else if constexpr (__is_avx512_pd<_Tp, _Np>())
591 return _mm512_getexp_pd(__x);
592 else
593 __assert_unreachable<_Tp>();
594 }
595
596template <typename _Tp, size_t _Np>
597 _SimdWrapper<_Tp, _Np>
598 __getmant_avx512(_SimdWrapper<_Tp, _Np> __x)
599 {
600 if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
601 return __auto_bitcast(_mm_getmant_ps(__to_intrin(__x), _MM_MANT_NORM_p5_1,
602 _MM_MANT_SIGN_src));
603 else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
604 return __auto_bitcast(_mm512_getmant_ps(__auto_bitcast(__to_intrin(__x)),
605 _MM_MANT_NORM_p5_1,
606 _MM_MANT_SIGN_src));
607 else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
608 return _mm_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
609 else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
610 return __lo128(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
611 _MM_MANT_SIGN_src));
612 else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
613 return _mm256_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
614 else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
615 return __lo256(_mm512_getmant_ps(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
616 _MM_MANT_SIGN_src));
617 else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
618 return _mm256_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
619 else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
620 return __lo256(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
621 _MM_MANT_SIGN_src));
622 else if constexpr (__is_avx512_ps<_Tp, _Np>())
623 return _mm512_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
624 else if constexpr (__is_avx512_pd<_Tp, _Np>())
625 return _mm512_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
626 else
627 __assert_unreachable<_Tp>();
628 }
629#endif // _GLIBCXX_SIMD_X86INTRIN
630
631/**
632 * splits @p __v into exponent and mantissa, the sign is kept with the mantissa
633 *
634 * The return value will be in the range [0.5, 1.0[
635 * The @p __e value will be an integer defining the power-of-two exponent
636 */
637template <typename _Tp, typename _Abi>
638 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
639 frexp(const simd<_Tp, _Abi>& __x, _Samesize<int, simd<_Tp, _Abi>>* __exp)
640 {
641 if constexpr (simd_size_v<_Tp, _Abi> == 1)
642 {
643 int __tmp;
644 const auto __r = std::frexp(__x[0], &__tmp);
645 (*__exp)[0] = __tmp;
646 return __r;
647 }
648 else if constexpr (__is_fixed_size_abi_v<_Abi>)
62a989ea 649 return {__private_init, _Abi::_SimdImpl::_S_frexp(__data(__x), __data(*__exp))};
2bcceb6f 650#if _GLIBCXX_SIMD_X86INTRIN
2bcceb6f
MK
651 else if constexpr (__have_avx512f)
652 {
653 constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
654 constexpr size_t _NI = _Np < 4 ? 4 : _Np;
655 const auto __v = __data(__x);
656 const auto __isnonzero
657 = _Abi::_SimdImpl::_S_isnonzerovalue_mask(__v._M_data);
658 const _SimdWrapper<int, _NI> __exp_plus1
659 = 1 + __convert<_SimdWrapper<int, _NI>>(__getexp(__v))._M_data;
660 const _SimdWrapper<int, _Np> __e = __wrapper_bitcast<int, _Np>(
661 _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _NI>(__isnonzero),
662 _SimdWrapper<int, _NI>(), __exp_plus1));
663 simd_abi::deduce_t<int, _Np>::_CommonImpl::_S_store(__e, __exp);
664 return {__private_init,
665 _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _Np>(
666 __isnonzero),
667 __v, __getmant_avx512(__v))};
2bcceb6f 668 }
62a989ea 669#endif // _GLIBCXX_SIMD_X86INTRIN
2bcceb6f
MK
670 else
671 {
672 // fallback implementation
673 static_assert(sizeof(_Tp) == 4 || sizeof(_Tp) == 8);
674 using _V = simd<_Tp, _Abi>;
675 using _IV = rebind_simd_t<int, _V>;
676 using namespace std::experimental::__proposed;
677 using namespace std::experimental::__float_bitwise_operators;
678
679 constexpr int __exp_adjust = sizeof(_Tp) == 4 ? 0x7e : 0x3fe;
680 constexpr int __exp_offset = sizeof(_Tp) == 4 ? 0x70 : 0x200;
681 constexpr _Tp __subnorm_scale = sizeof(_Tp) == 4 ? 0x1p112 : 0x1p512;
682 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __exponent_mask
683 = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
684 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __p5_1_exponent
685 = -(2 - __epsilon_v<_Tp>) / 2; // 0xbf7fffff or 0xbfefffffffffffff
686
687 _V __mant = __p5_1_exponent & (__exponent_mask | __x); // +/-[.5, 1)
688 const _IV __exponent_bits = __extract_exponent_as_int(__x);
689 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))))
690 {
691 *__exp
692 = simd_cast<_Samesize<int, _V>>(__exponent_bits - __exp_adjust);
693 return __mant;
694 }
695
696#if __FINITE_MATH_ONLY__
697 // at least one element of __x is 0 or subnormal, the rest is normal
698 // (inf and NaN are excluded by -ffinite-math-only)
699 const auto __iszero_inf_nan = __x == 0;
700#else
74ebd129
MK
701 using _Ip = __int_for_sizeof_t<_Tp>;
702 const auto __as_int = simd_bit_cast<rebind_simd_t<_Ip, _V>>(abs(__x));
703 const auto __inf = simd_bit_cast<rebind_simd_t<_Ip, _V>>(_V(__infinity_v<_Tp>));
2bcceb6f
MK
704 const auto __iszero_inf_nan = static_simd_cast<typename _V::mask_type>(
705 __as_int == 0 || __as_int >= __inf);
706#endif
707
708 const _V __scaled_subnormal = __x * __subnorm_scale;
709 const _V __mant_subnormal
710 = __p5_1_exponent & (__exponent_mask | __scaled_subnormal);
711 where(!isnormal(__x), __mant) = __mant_subnormal;
712 where(__iszero_inf_nan, __mant) = __x;
713 _IV __e = __extract_exponent_as_int(__scaled_subnormal);
714 using _MaskType =
715 typename conditional_t<sizeof(typename _V::value_type) == sizeof(int),
716 _V, _IV>::mask_type;
717 const _MaskType __value_isnormal = isnormal(__x).__cvt();
718 where(__value_isnormal.__cvt(), __e) = __exponent_bits;
719 static_assert(sizeof(_IV) == sizeof(__value_isnormal));
720 const _IV __offset
74ebd129
MK
721 = (simd_bit_cast<_IV>(__value_isnormal) & _IV(__exp_adjust))
722 | (simd_bit_cast<_IV>(static_simd_cast<_MaskType>(__exponent_bits == 0)
723 & static_simd_cast<_MaskType>(__x != 0))
724 & _IV(__exp_adjust + __exp_offset));
2bcceb6f
MK
725 *__exp = simd_cast<_Samesize<int, _V>>(__e - __offset);
726 return __mant;
727 }
728 }
729
730// }}}
731_GLIBCXX_SIMD_MATH_CALL2_(ldexp, int)
732_GLIBCXX_SIMD_MATH_CALL_(ilogb)
733
734// logarithms {{{
735_GLIBCXX_SIMD_MATH_CALL_(log)
736_GLIBCXX_SIMD_MATH_CALL_(log10)
737_GLIBCXX_SIMD_MATH_CALL_(log1p)
738_GLIBCXX_SIMD_MATH_CALL_(log2)
739
740//}}}
741// logb{{{
742template <typename _Tp, typename _Abi>
743 enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, _Abi>>
744 logb(const simd<_Tp, _Abi>& __x)
745 {
746 constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
747 if constexpr (_Np == 1)
748 return std::logb(__x[0]);
749 else if constexpr (__is_fixed_size_abi_v<_Abi>)
62a989ea 750 return {__private_init, _Abi::_SimdImpl::_S_logb(__data(__x))};
2bcceb6f
MK
751#if _GLIBCXX_SIMD_X86INTRIN // {{{
752 else if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
753 return {__private_init,
754 __auto_bitcast(_mm_getexp_ps(__to_intrin(__as_vector(__x))))};
755 else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
756 return {__private_init, _mm_getexp_pd(__data(__x))};
757 else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
758 return {__private_init, _mm256_getexp_ps(__data(__x))};
759 else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
760 return {__private_init, _mm256_getexp_pd(__data(__x))};
761 else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
762 return {__private_init,
763 __lo256(_mm512_getexp_ps(__auto_bitcast(__data(__x))))};
764 else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
765 return {__private_init,
766 __lo256(_mm512_getexp_pd(__auto_bitcast(__data(__x))))};
767 else if constexpr (__is_avx512_ps<_Tp, _Np>())
768 return {__private_init, _mm512_getexp_ps(__data(__x))};
769 else if constexpr (__is_avx512_pd<_Tp, _Np>())
770 return {__private_init, _mm512_getexp_pd(__data(__x))};
771#endif // _GLIBCXX_SIMD_X86INTRIN }}}
772 else
773 {
774 using _V = simd<_Tp, _Abi>;
775 using namespace std::experimental::__proposed;
776 auto __is_normal = isnormal(__x);
777
778 // work on abs(__x) to reflect the return value on Linux for negative
779 // inputs (domain-error => implementation-defined value is returned)
780 const _V abs_x = abs(__x);
781
782 // __exponent(__x) returns the exponent value (bias removed) as
783 // simd<_Up> with integral _Up
784 auto&& __exponent = [](const _V& __v) {
785 using namespace std::experimental::__proposed;
786 using _IV = rebind_simd_t<
787 conditional_t<sizeof(_Tp) == sizeof(_LLong), _LLong, int>, _V>;
74ebd129 788 return (simd_bit_cast<_IV>(__v) >> (__digits_v<_Tp> - 1))
2bcceb6f
MK
789 - (__max_exponent_v<_Tp> - 1);
790 };
791 _V __r = static_simd_cast<_V>(__exponent(abs_x));
792 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__is_normal)))
793 // without corner cases (nan, inf, subnormal, zero) we have our
794 // answer:
795 return __r;
796 const auto __is_zero = __x == 0;
797 const auto __is_nan = isnan(__x);
798 const auto __is_inf = isinf(__x);
799 where(__is_zero, __r) = -__infinity_v<_Tp>;
800 where(__is_nan, __r) = __x;
801 where(__is_inf, __r) = __infinity_v<_Tp>;
802 __is_normal |= __is_zero || __is_nan || __is_inf;
803 if (all_of(__is_normal))
804 // at this point everything but subnormals is handled
805 return __r;
806 // subnormals repeat the exponent extraction after multiplication of the
807 // input with __a floating point value that has 112 (0x70) in its exponent
808 // (not too big for sp and large enough for dp)
809 const _V __scaled = abs_x * _Tp(0x1p112);
810 _V __scaled_exp = static_simd_cast<_V>(__exponent(__scaled) - 112);
811 where(__is_normal, __scaled_exp) = __r;
812 return __scaled_exp;
813 }
814 }
815
816//}}}
817template <typename _Tp, typename _Abi>
818 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
819 modf(const simd<_Tp, _Abi>& __x, simd<_Tp, _Abi>* __iptr)
820 {
62a989ea 821 if constexpr (simd_size_v<_Tp, _Abi> == 1)
2bcceb6f
MK
822 {
823 _Tp __tmp;
824 _Tp __r = std::modf(__x[0], &__tmp);
825 __iptr[0] = __tmp;
826 return __r;
827 }
828 else
829 {
830 const auto __integral = trunc(__x);
831 *__iptr = __integral;
832 auto __r = __x - __integral;
833#if !__FINITE_MATH_ONLY__
834 where(isinf(__x), __r) = _Tp();
835#endif
836 return copysign(__r, __x);
837 }
838 }
839
840_GLIBCXX_SIMD_MATH_CALL2_(scalbn, int)
841_GLIBCXX_SIMD_MATH_CALL2_(scalbln, long)
842
843_GLIBCXX_SIMD_MATH_CALL_(cbrt)
844
845_GLIBCXX_SIMD_MATH_CALL_(abs)
846_GLIBCXX_SIMD_MATH_CALL_(fabs)
847
848// [parallel.simd.math] only asks for is_floating_point_v<_Tp> and forgot to
849// allow signed integral _Tp
850template <typename _Tp, typename _Abi>
851 enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
852 abs(const simd<_Tp, _Abi>& __x)
853 { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
854
2bcceb6f
MK
855#define _GLIBCXX_SIMD_CVTING2(_NAME) \
856template <typename _Tp, typename _Abi> \
857 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
858 const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y) \
859 { \
860 return _NAME(__x, __y); \
861 } \
862 \
863template <typename _Tp, typename _Abi> \
864 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
865 const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y) \
866 { \
867 return _NAME(__x, __y); \
868 }
869
870#define _GLIBCXX_SIMD_CVTING3(_NAME) \
871template <typename _Tp, typename _Abi> \
872 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
873 const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
874 const simd<_Tp, _Abi>& __z) \
875 { \
876 return _NAME(__x, __y, __z); \
877 } \
878 \
879template <typename _Tp, typename _Abi> \
880 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
881 const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
882 const simd<_Tp, _Abi>& __z) \
883 { \
884 return _NAME(__x, __y, __z); \
885 } \
886 \
887template <typename _Tp, typename _Abi> \
888 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
889 const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y, \
890 const __type_identity_t<simd<_Tp, _Abi>>& __z) \
891 { \
892 return _NAME(__x, __y, __z); \
893 } \
894 \
895template <typename _Tp, typename _Abi> \
896 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
897 const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
898 const __type_identity_t<simd<_Tp, _Abi>>& __z) \
899 { \
900 return _NAME(__x, __y, __z); \
901 } \
902 \
903template <typename _Tp, typename _Abi> \
904 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
905 const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
906 const __type_identity_t<simd<_Tp, _Abi>>& __z) \
907 { \
908 return _NAME(__x, __y, __z); \
909 } \
910 \
911template <typename _Tp, typename _Abi> \
912 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \
913 const __type_identity_t<simd<_Tp, _Abi>>& __x, \
914 const __type_identity_t<simd<_Tp, _Abi>>& __y, const simd<_Tp, _Abi>& __z) \
915 { \
916 return _NAME(__x, __y, __z); \
917 }
918
919template <typename _R, typename _ToApply, typename _Tp, typename... _Tps>
920 _GLIBCXX_SIMD_INTRINSIC _R
921 __fixed_size_apply(_ToApply&& __apply, const _Tp& __arg0,
922 const _Tps&... __args)
923 {
924 return {__private_init,
925 __data(__arg0)._M_apply_per_chunk(
926 [&](auto __impl, const auto&... __inner) {
927 using _V = typename decltype(__impl)::simd_type;
928 return __data(__apply(_V(__private_init, __inner)...));
929 },
930 __data(__args)...)};
931 }
932
933template <typename _VV>
934 __remove_cvref_t<_VV>
935 __hypot(_VV __x, _VV __y)
936 {
937 using _V = __remove_cvref_t<_VV>;
938 using _Tp = typename _V::value_type;
939 if constexpr (_V::size() == 1)
940 return std::hypot(_Tp(__x[0]), _Tp(__y[0]));
941 else if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
942 {
943 return __fixed_size_apply<_V>([](auto __a,
944 auto __b) { return hypot(__a, __b); },
945 __x, __y);
946 }
947 else
948 {
949 // A simple solution for _Tp == float would be to cast to double and
950 // simply calculate sqrt(x²+y²) as it can't over-/underflow anymore with
951 // dp. It still needs the Annex F fixups though and isn't faster on
952 // Skylake-AVX512 (not even for SSE and AVX vectors, and really bad for
953 // AVX-512).
954 using namespace __float_bitwise_operators;
74ebd129 955 using namespace __proposed;
2bcceb6f
MK
956 _V __absx = abs(__x); // no error
957 _V __absy = abs(__y); // no error
958 _V __hi = max(__absx, __absy); // no error
959 _V __lo = min(__absy, __absx); // no error
960
961 // round __hi down to the next power-of-2:
962 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
963
964#ifndef __FAST_MATH__
965 if constexpr (__have_neon && !__have_neon_a32)
966 { // With ARMv7 NEON, we have no subnormals and must use slightly
967 // different strategy
968 const _V __hi_exp = __hi & __inf;
969 _V __scale_back = __hi_exp;
970 // For large exponents (max & max/2) the inversion comes too close
971 // to subnormals. Subtract 3 from the exponent:
972 where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
973 // Invert and adjust for the off-by-one error of inversion via xor:
974 const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
975 const _V __h1 = __hi * __scale;
976 const _V __l1 = __lo * __scale;
977 _V __r = __scale_back * sqrt(__h1 * __h1 + __l1 * __l1);
978 // Fix up hypot(0, 0) to not be NaN:
979 where(__hi == 0, __r) = 0;
980 return __r;
981 }
982#endif
983
984#ifdef __FAST_MATH__
985 // With fast-math, ignore precision of subnormals and inputs from
986 // __finite_max_v/2 to __finite_max_v. This removes all
987 // branching/masking.
988 if constexpr (true)
989#else
990 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
991 && all_of(isnormal(__y))))
992#endif
993 {
994 const _V __hi_exp = __hi & __inf;
995 //((__hi + __hi) & __inf) ^ __inf almost works for computing
996 //__scale,
997 // except when (__hi + __hi) & __inf == __inf, in which case __scale
998 // becomes 0 (should be min/2 instead) and thus loses the
999 // information from __lo.
1000#ifdef __FAST_MATH__
1001 using _Ip = __int_for_sizeof_t<_Tp>;
1002 using _IV = rebind_simd_t<_Ip, _V>;
74ebd129 1003 const auto __as_int = simd_bit_cast<_IV>(__hi_exp);
2bcceb6f 1004 const _V __scale
74ebd129 1005 = simd_bit_cast<_V>(2 * simd_bit_cast<_Ip>(_Tp(1)) - __as_int);
2bcceb6f
MK
1006#else
1007 const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
1008#endif
1009 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __mant_mask
1010 = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
1011 const _V __h1 = (__hi & __mant_mask) | _V(1);
1012 const _V __l1 = __lo * __scale;
1013 return __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
1014 }
1015 else
1016 {
1017 // slower path to support subnormals
1018 // if __hi is subnormal, avoid scaling by inf & final mul by 0
1019 // (which yields NaN) by using min()
1020 _V __scale = _V(1 / __norm_min_v<_Tp>);
1021 // invert exponent w/o error and w/o using the slow divider unit:
1022 // xor inverts the exponent but off by 1. Multiplication with .5
1023 // adjusts for the discrepancy.
1024 where(__hi >= __norm_min_v<_Tp>, __scale)
1025 = ((__hi & __inf) ^ __inf) * _Tp(.5);
1026 // adjust final exponent for subnormal inputs
1027 _V __hi_exp = __norm_min_v<_Tp>;
1028 where(__hi >= __norm_min_v<_Tp>, __hi_exp)
1029 = __hi & __inf; // no error
1030 _V __h1 = __hi * __scale; // no error
1031 _V __l1 = __lo * __scale; // no error
1032
1033 // sqrt(x²+y²) = e*sqrt((x/e)²+(y/e)²):
1034 // this ensures no overflow in the argument to sqrt
1035 _V __r = __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
1036#ifdef __STDC_IEC_559__
1037 // fixup for Annex F requirements
1038 // the naive fixup goes like this:
1039 //
1040 // where(__l1 == 0, __r) = __hi;
1041 // where(isunordered(__x, __y), __r) = __quiet_NaN_v<_Tp>;
1042 // where(isinf(__absx) || isinf(__absy), __r) = __inf;
1043 //
1044 // The fixup can be prepared in parallel with the sqrt, requiring a
1045 // single blend step after hi_exp * sqrt, reducing latency and
1046 // throughput:
1047 _V __fixup = __hi; // __lo == 0
1048 where(isunordered(__x, __y), __fixup) = __quiet_NaN_v<_Tp>;
1049 where(isinf(__absx) || isinf(__absy), __fixup) = __inf;
1050 where(!(__lo == 0 || isunordered(__x, __y)
1051 || (isinf(__absx) || isinf(__absy))),
1052 __fixup)
1053 = __r;
1054 __r = __fixup;
1055#endif
1056 return __r;
1057 }
1058 }
1059 }
1060
1061template <typename _Tp, typename _Abi>
1062 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
1063 hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
1064 {
1065 return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
1066 const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
1067 __y);
1068 }
1069
1070_GLIBCXX_SIMD_CVTING2(hypot)
1071
1072 template <typename _VV>
1073 __remove_cvref_t<_VV>
1074 __hypot(_VV __x, _VV __y, _VV __z)
1075 {
1076 using _V = __remove_cvref_t<_VV>;
1077 using _Abi = typename _V::abi_type;
1078 using _Tp = typename _V::value_type;
1079 /* FIXME: enable after PR77776 is resolved
1080 if constexpr (_V::size() == 1)
1081 return std::hypot(_Tp(__x[0]), _Tp(__y[0]), _Tp(__z[0]));
1082 else
1083 */
1084 if constexpr (__is_fixed_size_abi_v<_Abi> && _V::size() > 1)
1085 {
1086 return __fixed_size_apply<simd<_Tp, _Abi>>(
1087 [](auto __a, auto __b, auto __c) { return hypot(__a, __b, __c); },
1088 __x, __y, __z);
1089 }
1090 else
1091 {
1092 using namespace __float_bitwise_operators;
74ebd129 1093 using namespace __proposed;
2bcceb6f
MK
1094 const _V __absx = abs(__x); // no error
1095 const _V __absy = abs(__y); // no error
1096 const _V __absz = abs(__z); // no error
1097 _V __hi = max(max(__absx, __absy), __absz); // no error
1098 _V __l0 = min(__absz, max(__absx, __absy)); // no error
1099 _V __l1 = min(__absy, __absx); // no error
1100 if constexpr (__digits_v<_Tp> == 64 && __max_exponent_v<_Tp> == 0x4000
1101 && __min_exponent_v<_Tp> == -0x3FFD && _V::size() == 1)
1102 { // Seems like x87 fp80, where bit 63 is always 1 unless subnormal or
1103 // NaN. In this case the bit-tricks don't work, they require IEC559
1104 // binary32 or binary64 format.
1105#ifdef __STDC_IEC_559__
1106 // fixup for Annex F requirements
1107 if (isinf(__absx[0]) || isinf(__absy[0]) || isinf(__absz[0]))
1108 return __infinity_v<_Tp>;
1109 else if (isunordered(__absx[0], __absy[0] + __absz[0]))
1110 return __quiet_NaN_v<_Tp>;
1111 else if (__l0[0] == 0 && __l1[0] == 0)
1112 return __hi;
1113#endif
1114 _V __hi_exp = __hi;
1115 const _ULLong __tmp = 0x8000'0000'0000'0000ull;
1116 __builtin_memcpy(&__data(__hi_exp), &__tmp, 8);
1117 const _V __scale = 1 / __hi_exp;
1118 __hi *= __scale;
1119 __l0 *= __scale;
1120 __l1 *= __scale;
1121 return __hi_exp * sqrt((__l0 * __l0 + __l1 * __l1) + __hi * __hi);
1122 }
1123 else
1124 {
1125 // round __hi down to the next power-of-2:
1126 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
1127
1128#ifndef __FAST_MATH__
1129 if constexpr (_V::size() > 1 && __have_neon && !__have_neon_a32)
1130 { // With ARMv7 NEON, we have no subnormals and must use slightly
1131 // different strategy
1132 const _V __hi_exp = __hi & __inf;
1133 _V __scale_back = __hi_exp;
1134 // For large exponents (max & max/2) the inversion comes too
1135 // close to subnormals. Subtract 3 from the exponent:
1136 where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
1137 // Invert and adjust for the off-by-one error of inversion via
1138 // xor:
1139 const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
1140 const _V __h1 = __hi * __scale;
1141 __l0 *= __scale;
1142 __l1 *= __scale;
1143 _V __lo = __l0 * __l0
1144 + __l1 * __l1; // add the two smaller values first
1145 asm("" : "+m"(__lo));
1146 _V __r = __scale_back * sqrt(__h1 * __h1 + __lo);
1147 // Fix up hypot(0, 0, 0) to not be NaN:
1148 where(__hi == 0, __r) = 0;
1149 return __r;
1150 }
1151#endif
1152
1153#ifdef __FAST_MATH__
1154 // With fast-math, ignore precision of subnormals and inputs from
1155 // __finite_max_v/2 to __finite_max_v. This removes all
1156 // branching/masking.
1157 if constexpr (true)
1158#else
1159 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
1160 && all_of(isnormal(__y))
1161 && all_of(isnormal(__z))))
1162#endif
1163 {
1164 const _V __hi_exp = __hi & __inf;
1165 //((__hi + __hi) & __inf) ^ __inf almost works for computing
1166 //__scale, except when (__hi + __hi) & __inf == __inf, in which
1167 // case __scale
1168 // becomes 0 (should be min/2 instead) and thus loses the
1169 // information from __lo.
1170#ifdef __FAST_MATH__
1171 using _Ip = __int_for_sizeof_t<_Tp>;
1172 using _IV = rebind_simd_t<_Ip, _V>;
74ebd129 1173 const auto __as_int = simd_bit_cast<_IV>(__hi_exp);
2bcceb6f 1174 const _V __scale
74ebd129 1175 = simd_bit_cast<_V>(2 * simd_bit_cast<_Ip>(_Tp(1)) - __as_int);
2bcceb6f
MK
1176#else
1177 const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
1178#endif
1179 constexpr _Tp __mant_mask
1180 = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
1181 const _V __h1 = (__hi & _V(__mant_mask)) | _V(1);
1182 __l0 *= __scale;
1183 __l1 *= __scale;
1184 const _V __lo
1185 = __l0 * __l0
1186 + __l1 * __l1; // add the two smaller values first
1187 return __hi_exp * sqrt(__lo + __h1 * __h1);
1188 }
1189 else
1190 {
1191 // slower path to support subnormals
1192 // if __hi is subnormal, avoid scaling by inf & final mul by 0
1193 // (which yields NaN) by using min()
1194 _V __scale = _V(1 / __norm_min_v<_Tp>);
1195 // invert exponent w/o error and w/o using the slow divider
1196 // unit: xor inverts the exponent but off by 1. Multiplication
1197 // with .5 adjusts for the discrepancy.
1198 where(__hi >= __norm_min_v<_Tp>, __scale)
1199 = ((__hi & __inf) ^ __inf) * _Tp(.5);
1200 // adjust final exponent for subnormal inputs
1201 _V __hi_exp = __norm_min_v<_Tp>;
1202 where(__hi >= __norm_min_v<_Tp>, __hi_exp)
1203 = __hi & __inf; // no error
1204 _V __h1 = __hi * __scale; // no error
1205 __l0 *= __scale; // no error
1206 __l1 *= __scale; // no error
1207 _V __lo = __l0 * __l0
1208 + __l1 * __l1; // add the two smaller values first
1209 _V __r = __hi_exp * sqrt(__lo + __h1 * __h1);
1210#ifdef __STDC_IEC_559__
1211 // fixup for Annex F requirements
1212 _V __fixup = __hi; // __lo == 0
1213 // where(__lo == 0, __fixup) = __hi;
1214 where(isunordered(__x, __y + __z), __fixup)
1215 = __quiet_NaN_v<_Tp>;
1216 where(isinf(__absx) || isinf(__absy) || isinf(__absz), __fixup)
1217 = __inf;
1218 // Instead of __lo == 0, the following could depend on __h1² ==
1219 // __h1² + __lo (i.e. __hi is so much larger than the other two
1220 // inputs that the result is exactly __hi). While this may
1221 // improve precision, it is likely to reduce efficiency if the
1222 // ISA has FMAs (because __h1² + __lo is an FMA, but the
1223 // intermediate
1224 // __h1² must be kept)
1225 where(!(__lo == 0 || isunordered(__x, __y + __z)
1226 || isinf(__absx) || isinf(__absy) || isinf(__absz)),
1227 __fixup)
1228 = __r;
1229 __r = __fixup;
1230#endif
1231 return __r;
1232 }
1233 }
1234 }
1235 }
1236
1237 template <typename _Tp, typename _Abi>
1238 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
1239 hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y,
1240 const simd<_Tp, _Abi>& __z)
1241 {
1242 return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
1243 const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
1244 __y,
1245 __z);
1246 }
1247
1248_GLIBCXX_SIMD_CVTING3(hypot)
1249
1250_GLIBCXX_SIMD_MATH_CALL2_(pow, _Tp)
1251
1252_GLIBCXX_SIMD_MATH_CALL_(sqrt)
1253_GLIBCXX_SIMD_MATH_CALL_(erf)
1254_GLIBCXX_SIMD_MATH_CALL_(erfc)
1255_GLIBCXX_SIMD_MATH_CALL_(lgamma)
1256_GLIBCXX_SIMD_MATH_CALL_(tgamma)
1257_GLIBCXX_SIMD_MATH_CALL_(ceil)
1258_GLIBCXX_SIMD_MATH_CALL_(floor)
1259_GLIBCXX_SIMD_MATH_CALL_(nearbyint)
1260_GLIBCXX_SIMD_MATH_CALL_(rint)
1261_GLIBCXX_SIMD_MATH_CALL_(lrint)
1262_GLIBCXX_SIMD_MATH_CALL_(llrint)
1263
1264_GLIBCXX_SIMD_MATH_CALL_(round)
1265_GLIBCXX_SIMD_MATH_CALL_(lround)
1266_GLIBCXX_SIMD_MATH_CALL_(llround)
1267
1268_GLIBCXX_SIMD_MATH_CALL_(trunc)
1269
1270_GLIBCXX_SIMD_MATH_CALL2_(fmod, _Tp)
1271_GLIBCXX_SIMD_MATH_CALL2_(remainder, _Tp)
1272_GLIBCXX_SIMD_MATH_CALL3_(remquo, _Tp, int*)
1273
1274template <typename _Tp, typename _Abi>
1275 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1276 copysign(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
1277 {
1278 if constexpr (simd_size_v<_Tp, _Abi> == 1)
1279 return std::copysign(__x[0], __y[0]);
0237aa8c
MK
1280 else if constexpr (__is_fixed_size_abi_v<_Abi>)
1281 return {__private_init, _Abi::_SimdImpl::_S_copysign(__data(__x), __data(__y))};
2bcceb6f
MK
1282 else
1283 {
1284 using _V = simd<_Tp, _Abi>;
1285 using namespace std::experimental::__float_bitwise_operators;
1286 _GLIBCXX_SIMD_USE_CONSTEXPR_API auto __signmask = _V(1) ^ _V(-1);
0237aa8c 1287 return (__x & ~__signmask) | (__y & __signmask);
2bcceb6f
MK
1288 }
1289 }
1290
1291_GLIBCXX_SIMD_MATH_CALL2_(nextafter, _Tp)
1292// not covered in [parallel.simd.math]:
1293// _GLIBCXX_SIMD_MATH_CALL2_(nexttoward, long double)
1294_GLIBCXX_SIMD_MATH_CALL2_(fdim, _Tp)
1295_GLIBCXX_SIMD_MATH_CALL2_(fmax, _Tp)
1296_GLIBCXX_SIMD_MATH_CALL2_(fmin, _Tp)
1297
1298_GLIBCXX_SIMD_MATH_CALL3_(fma, _Tp, _Tp)
1299_GLIBCXX_SIMD_MATH_CALL_(fpclassify)
1300_GLIBCXX_SIMD_MATH_CALL_(isfinite)
1301
1302// isnan and isinf require special treatment because old glibc may declare
1303// `int isinf(double)`.
1304template <typename _Tp, typename _Abi, typename...,
1305 typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
1306 enable_if_t<is_floating_point_v<_Tp>, _R>
1307 isinf(simd<_Tp, _Abi> __x)
1308 { return {__private_init, _Abi::_SimdImpl::_S_isinf(__data(__x))}; }
1309
1310template <typename _Tp, typename _Abi, typename...,
1311 typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
1312 enable_if_t<is_floating_point_v<_Tp>, _R>
1313 isnan(simd<_Tp, _Abi> __x)
1314 { return {__private_init, _Abi::_SimdImpl::_S_isnan(__data(__x))}; }
1315
1316_GLIBCXX_SIMD_MATH_CALL_(isnormal)
1317
1318template <typename..., typename _Tp, typename _Abi>
1319 simd_mask<_Tp, _Abi>
1320 signbit(simd<_Tp, _Abi> __x)
1321 {
1322 if constexpr (is_integral_v<_Tp>)
1323 {
1324 if constexpr (is_unsigned_v<_Tp>)
1325 return simd_mask<_Tp, _Abi>{}; // false
1326 else
1327 return __x < 0;
1328 }
1329 else
1330 return {__private_init, _Abi::_SimdImpl::_S_signbit(__data(__x))};
1331 }
1332
1333_GLIBCXX_SIMD_MATH_CALL2_(isgreater, _Tp)
1334_GLIBCXX_SIMD_MATH_CALL2_(isgreaterequal, _Tp)
1335_GLIBCXX_SIMD_MATH_CALL2_(isless, _Tp)
1336_GLIBCXX_SIMD_MATH_CALL2_(islessequal, _Tp)
1337_GLIBCXX_SIMD_MATH_CALL2_(islessgreater, _Tp)
1338_GLIBCXX_SIMD_MATH_CALL2_(isunordered, _Tp)
1339
1340/* not covered in [parallel.simd.math]
1341template <typename _Abi> __doublev<_Abi> nan(const char* tagp);
1342template <typename _Abi> __floatv<_Abi> nanf(const char* tagp);
1343template <typename _Abi> __ldoublev<_Abi> nanl(const char* tagp);
1344
1345template <typename _V> struct simd_div_t {
1346 _V quot, rem;
1347};
1348
1349template <typename _Abi>
1350simd_div_t<_SCharv<_Abi>> div(_SCharv<_Abi> numer,
1351 _SCharv<_Abi> denom);
1352template <typename _Abi>
1353simd_div_t<__shortv<_Abi>> div(__shortv<_Abi> numer,
1354 __shortv<_Abi> denom);
1355template <typename _Abi>
1356simd_div_t<__intv<_Abi>> div(__intv<_Abi> numer, __intv<_Abi> denom);
1357template <typename _Abi>
1358simd_div_t<__longv<_Abi>> div(__longv<_Abi> numer,
1359 __longv<_Abi> denom);
1360template <typename _Abi>
1361simd_div_t<__llongv<_Abi>> div(__llongv<_Abi> numer,
1362 __llongv<_Abi> denom);
1363*/
1364
1365// special math {{{
1366template <typename _Tp, typename _Abi>
1367 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1368 assoc_laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1369 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1370 const simd<_Tp, _Abi>& __x)
1371 {
1372 return simd<_Tp, _Abi>([&](auto __i) {
1373 return std::assoc_laguerre(__n[__i], __m[__i], __x[__i]);
1374 });
1375 }
1376
1377template <typename _Tp, typename _Abi>
1378 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1379 assoc_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1380 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1381 const simd<_Tp, _Abi>& __x)
1382 {
1383 return simd<_Tp, _Abi>([&](auto __i) {
1384 return std::assoc_legendre(__n[__i], __m[__i], __x[__i]);
1385 });
1386 }
1387
1388_GLIBCXX_SIMD_MATH_CALL2_(beta, _Tp)
1389_GLIBCXX_SIMD_MATH_CALL_(comp_ellint_1)
1390_GLIBCXX_SIMD_MATH_CALL_(comp_ellint_2)
1391_GLIBCXX_SIMD_MATH_CALL2_(comp_ellint_3, _Tp)
1392_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_i, _Tp)
1393_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_j, _Tp)
1394_GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_k, _Tp)
1395_GLIBCXX_SIMD_MATH_CALL2_(cyl_neumann, _Tp)
1396_GLIBCXX_SIMD_MATH_CALL2_(ellint_1, _Tp)
1397_GLIBCXX_SIMD_MATH_CALL2_(ellint_2, _Tp)
1398_GLIBCXX_SIMD_MATH_CALL3_(ellint_3, _Tp, _Tp)
1399_GLIBCXX_SIMD_MATH_CALL_(expint)
1400
1401template <typename _Tp, typename _Abi>
1402 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1403 hermite(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1404 const simd<_Tp, _Abi>& __x)
1405 {
1406 return simd<_Tp, _Abi>(
1407 [&](auto __i) { return std::hermite(__n[__i], __x[__i]); });
1408 }
1409
1410template <typename _Tp, typename _Abi>
1411 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1412 laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1413 const simd<_Tp, _Abi>& __x)
1414 {
1415 return simd<_Tp, _Abi>(
1416 [&](auto __i) { return std::laguerre(__n[__i], __x[__i]); });
1417 }
1418
1419template <typename _Tp, typename _Abi>
1420 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1421 legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1422 const simd<_Tp, _Abi>& __x)
1423 {
1424 return simd<_Tp, _Abi>(
1425 [&](auto __i) { return std::legendre(__n[__i], __x[__i]); });
1426 }
1427
1428_GLIBCXX_SIMD_MATH_CALL_(riemann_zeta)
1429
1430template <typename _Tp, typename _Abi>
1431 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1432 sph_bessel(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1433 const simd<_Tp, _Abi>& __x)
1434 {
1435 return simd<_Tp, _Abi>(
1436 [&](auto __i) { return std::sph_bessel(__n[__i], __x[__i]); });
1437 }
1438
1439template <typename _Tp, typename _Abi>
1440 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1441 sph_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __l,
1442 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1443 const simd<_Tp, _Abi>& theta)
1444 {
1445 return simd<_Tp, _Abi>([&](auto __i) {
1446 return std::assoc_legendre(__l[__i], __m[__i], theta[__i]);
1447 });
1448 }
1449
1450template <typename _Tp, typename _Abi>
1451 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1452 sph_neumann(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1453 const simd<_Tp, _Abi>& __x)
1454 {
1455 return simd<_Tp, _Abi>(
1456 [&](auto __i) { return std::sph_neumann(__n[__i], __x[__i]); });
1457 }
1458// }}}
1459
62a989ea
MK
1460#undef _GLIBCXX_SIMD_CVTING2
1461#undef _GLIBCXX_SIMD_CVTING3
2bcceb6f
MK
1462#undef _GLIBCXX_SIMD_MATH_CALL_
1463#undef _GLIBCXX_SIMD_MATH_CALL2_
1464#undef _GLIBCXX_SIMD_MATH_CALL3_
1465
1466_GLIBCXX_SIMD_END_NAMESPACE
1467
1468#endif // __cplusplus >= 201703L
1469#endif // _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
1470
1471// vim: foldmethod=marker sw=2 ts=8 noet sts=2