Vector variants of the new C23 powr routines.
These provide same maximum error error as pow by virtue of
relying on shared approximation techniques and sources.
Note: Benchmark inputs for powr(f) are identical to pow(f).
Performance gain over pow on V1 with GCC@15:
- SVE powr: 10-12% on subnormal x, 12-13% on x < 0.
- SVE powrf: 15% on all x < 0.
- AdvSIMD powr: for x < 0, 40% if x subnormal, 60% otherwise.
- AdvSIMD powrf: 4% on x subnormals or x < 0.
#define __DECL_SIMD_powf64x
#define __DECL_SIMD_powf128x
+#define __DECL_SIMD_powr
+#define __DECL_SIMD_powrf
+#define __DECL_SIMD_powrl
+#define __DECL_SIMD_powrf16
+#define __DECL_SIMD_powrf32
+#define __DECL_SIMD_powrf64
+#define __DECL_SIMD_powrf128
+#define __DECL_SIMD_powrf32x
+#define __DECL_SIMD_powrf64x
+#define __DECL_SIMD_powrf128x
+
#define __DECL_SIMD_acos
#define __DECL_SIMD_acosf
#define __DECL_SIMD_acosl
__MATHCALL (pown,, (_Mdouble_ __x, long long int __y));
/* Return X to the Y power. */
+__MATHCALL_VEC (powr,, (_Mdouble_ __x, _Mdouble_ __y));
__MATHCALL (powr,, (_Mdouble_ __x, _Mdouble_ __y));
/* Return the Yth root of X. */
log2 \
log2p1 \
pow \
+ powr \
rsqrt \
sin \
sinh \
_ZGVsMxv_rsqrt;
_ZGVsMxv_rsqrtf;
}
+ GLIBC_2.44 {
+ _ZGVnN2vv_powr;
+ _ZGVnN2vv_powrf;
+ _ZGVnN4vv_powrf;
+ _ZGVsMxvv_powr;
+ _ZGVsMxvv_powrf;
+ }
}
libmvec_hidden_proto (V_NAME_F1(logp1));
libmvec_hidden_proto (V_NAME_F1(log));
libmvec_hidden_proto (V_NAME_F2(pow));
+libmvec_hidden_proto (V_NAME_F2(powr));
libmvec_hidden_proto (V_NAME_F1(rsqrt));
libmvec_hidden_proto (V_NAME_F1(sin));
libmvec_hidden_proto (V_NAME_F1(sinh));
# define __DECL_SIMD_pow __DECL_SIMD_aarch64
# undef __DECL_SIMD_powf
# define __DECL_SIMD_powf __DECL_SIMD_aarch64
+# undef __DECL_SIMD_powr
+# define __DECL_SIMD_powr __DECL_SIMD_aarch64
+# undef __DECL_SIMD_powrf
+# define __DECL_SIMD_powrf __DECL_SIMD_aarch64
# undef __DECL_SIMD_rsqrt
# define __DECL_SIMD_rsqrt __DECL_SIMD_aarch64
# undef __DECL_SIMD_rsqrtf
__vpcs __f32x4_t _ZGVnN4v_log2p1f (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_logp1f (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t);
+__vpcs __f32x4_t _ZGVnN4vv_powrf (__f32x4_t, __f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_rsqrtf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_sinhf (__f32x4_t);
__vpcs __f64x2_t _ZGVnN2v_log2p1 (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_logp1 (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t);
+__vpcs __f64x2_t _ZGVnN2vv_powr (__f64x2_t, __f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_rsqrt (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_sinh (__f64x2_t);
__sv_f32_t _ZGVsMxv_log2p1f (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_logp1f (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxvv_powf (__sv_f32_t, __sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxvv_powrf (__sv_f32_t, __sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_rsqrtf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_sinf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_sinhf (__sv_f32_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_log2p1 (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_logp1 (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxvv_pow (__sv_f64_t, __sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxvv_powr (__sv_f64_t, __sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_rsqrt (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_sin (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_sinh (__sv_f64_t, __sv_bool_t);
!GCC$ builtin (logp1f) attributes simd (notinbranch) if('fastmath')
!GCC$ builtin (pow) attributes simd (notinbranch) if('fastmath')
!GCC$ builtin (powf) attributes simd (notinbranch) if('fastmath')
+!GCC$ builtin (powr) attributes simd (notinbranch) if('fastmath')
+!GCC$ builtin (powrf) attributes simd (notinbranch) if('fastmath')
!GCC$ builtin (rsqrt) attributes simd (notinbranch) if('fastmath')
!GCC$ builtin (rsqrtf) attributes simd (notinbranch) if('fastmath')
!GCC$ builtin (sin) attributes simd (notinbranch) if('fastmath')
--- /dev/null
+/* Double-precision vector (AdvSIMD) powr function
+
+ Copyright (C) 2026 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "pow_common.h"
+#include "v_math.h"
+
+#include "v_pow_inline.h"
+
+static double NOINLINE
+powr_scalar_special_case (double x, double y)
+{
+ /* Negative x returns NaN (+0/-0 and NaN x not handled here). */
+ if (x < 0)
+ return __builtin_nan ("");
+
+ uint64_t ix = asuint64 (x);
+ uint64_t iy = asuint64 (y);
+ uint32_t topx = top12 (x);
+ uint32_t topy = top12 (y);
+
+ /* Special cases: (x < 0x1p-126 or inf or nan) or
+ (|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
+ if (__glibc_unlikely (topx - SmallPowX >= ThresPowX
+ || (topy & 0x7ff) - SmallPowY >= ThresPowY))
+ {
+ /* |y| is 0, Inf or NaN. */
+ if (__glibc_unlikely (zeroinfnan (iy)))
+ {
+ if (2 * ix > 2 * asuint64 (INFINITY)
+ || 2 * iy > 2 * asuint64 (INFINITY))
+ return __builtin_nan ("");
+ if (2 * iy == 0)
+ {
+ /* |x| = 0 or inf. */
+ if ((2 * ix == 0) || (2 * ix == 2 * asuint64 (INFINITY)))
+ return __builtin_nan ("");
+ /* x is finite. */
+ return 1.0;
+ }
+ /* |y| = Inf and x = 1.0. */
+ if (ix == asuint64 (1.0))
+ return __builtin_nan ("");
+ /* |x| < 1 and y = Inf or |x| > 1 and y = -Inf. */
+ if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))
+ return 0.0;
+ /* |y| = Inf and previous conditions not met. */
+ return y * y;
+ }
+ /* |x| is 0, Inf or NaN. */
+ if (__glibc_unlikely (zeroinfnan (ix)))
+ {
+ double x2 = x * x;
+ return iy >> 63 ? 1 / x2 : x2;
+ }
+ /* Here x and y are non-zero finite. */
+ /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then powr(x,y) = inf/0
+ and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then powr(x,y) = +-1. */
+ if ((topy & 0x7ff) - SmallPowY >= ThresPowY)
+ {
+ if (ix == asuint64 (1.0))
+ return 1.0;
+ /* |y| < 2^-65, x^y ~= 1 + y*log(x). */
+ if ((topy & 0x7ff) < SmallPowY)
+ return 1.0;
+ return (ix > asuint64 (1.0)) == (topy < 0x800) ? INFINITY : 0;
+ }
+ if (topx == 0)
+ {
+ /* Normalize subnormal x so exponent becomes negative. */
+ ix = asuint64 (x * 0x1p52);
+ ix -= 52ULL << 52;
+ }
+ }
+
+ /* Core computation of exp (y * log (x)). */
+ double lo;
+ double hi = log_inline (ix, &lo);
+ double ehi = y * hi;
+ double elo = y * lo + fma (y, hi, -ehi);
+ return exp_inline (ehi, elo, 0);
+}
+
+static float64x2_t VPCS_ATTR NOINLINE
+scalar_fallback (float64x2_t x, float64x2_t y)
+{
+ return (float64x2_t){ powr_scalar_special_case (x[0], y[0]),
+ powr_scalar_special_case (x[1], y[1]) };
+}
+
+/* Implementation of AdvSIMD powr.
+ Maximum measured error is 1.04 ULPs:
+ _ZGVnN2vv_powr(0x1.024a3e56b3c3p-136, 0x1.87910248b58acp-13)
+ got 0x1.f71162f473251p-1
+ want 0x1.f71162f473252p-1. */
+float64x2_t VPCS_ATTR V_NAME_D2 (powr) (float64x2_t x, float64x2_t y)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* Case of x <= 0 is too complicated to be vectorised efficiently here,
+ fallback to scalar pow for all lanes if any x < 0 detected. */
+ if (v_any_u64 (vclezq_s64 (vreinterpretq_s64_f64 (x))))
+ return scalar_fallback (x, y);
+
+ uint64x2_t vix = vreinterpretq_u64_f64 (x);
+ uint64x2_t viy = vreinterpretq_u64_f64 (y);
+
+ /* Special cases of x or y.
+ The case y==0 does not trigger a special case, since in this case it is
+ necessary to fix the result only if x is a signalling nan, which already
+ triggers a special case. We test y==0 directly in the scalar fallback. */
+ uint64x2_t x_is_inf_or_nan = vcgeq_u64 (vandq_u64 (vix, d->inf), d->inf);
+ uint64x2_t y_is_inf_or_nan = vcgeq_u64 (vandq_u64 (viy, d->inf), d->inf);
+ uint64x2_t special = vorrq_u64 (x_is_inf_or_nan, y_is_inf_or_nan);
+
+ /* Fallback to scalar on all lanes if any lane is inf or nan. */
+ if (__glibc_unlikely (v_any_u64 (special)))
+ return scalar_fallback (x, y);
+
+ /* Cases of subnormal x: |x| < 0x1p-1022. */
+ uint64x2_t x_is_subnormal = vcaltq_f64 (x, d->subnormal_bound);
+ if (__glibc_unlikely (v_any_u64 (x_is_subnormal)))
+ {
+ /* Normalize subnormal x so exponent becomes negative. */
+ uint64x2_t vix_norm
+ = vreinterpretq_u64_f64 (vmulq_f64 (x, d->subnormal_scale));
+ vix_norm = vsubq_u64 (vix_norm, d->subnormal_bias);
+ x = vbslq_f64 (x_is_subnormal, vreinterpretq_f64_u64 (vix_norm), x);
+ }
+
+ /* Core computation of exp (y * log (x)). */
+ return v_pow_inline (x, y, d);
+}
--- /dev/null
+/* Double-precision vector (SVE) powr function
+
+ Copyright (C) 2026 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "math_config.h"
+#include "pow_common.h"
+#include "sv_math.h"
+
+#define WANT_SV_POW_SIGN_BIAS 0
+#include "sv_pow_inline.h"
+
+/* A scalar subroutine used to fix main powr special cases. */
+static inline double
+powr_specialcase (double x, double y)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t iy = asuint64 (y);
+ /* |y| is 0, Inf or NaN. */
+ if (__glibc_unlikely (zeroinfnan (iy)))
+ {
+ /* |x| or |y| is NaN. */
+ if (2 * ix > 2 * asuint64 (INFINITY) || 2 * iy > 2 * asuint64 (INFINITY))
+ return __builtin_nan ("");
+ /* |y| is 0.0. */
+ if (2 * iy == 0)
+ {
+ /* |x| = 0 or Inf. */
+ if ((2 * ix == 0) || (2 * ix == 2 * asuint64 (INFINITY)))
+ return __builtin_nan ("");
+ /* x is finite. */
+ return 1.0;
+ }
+ /* x is 1.0. */
+ if (ix == asuint64 (1.0))
+ return __builtin_nan ("");
+ /* |x| < 1 and y = Inf or |x| > 1 and y = -Inf. */
+ if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))
+ return 0.0;
+ /* |y| = Inf and previous conditions not met. */
+ return y * y;
+ }
+ /* x is 0, Inf or NaN. Negative x are handled in the core. */
+ if (__glibc_unlikely (zeroinfnan (ix)))
+ {
+ double x2 = x * x;
+ return (iy >> 63) ? 1 / x2 : x2;
+ }
+ /* Return x for convenience, but make sure result is never used. */
+ return x;
+}
+
+/* Scalar fallback for special case routines with custom signature. */
+static svfloat64_t NOINLINE
+sv_powr_specialcase (svfloat64_t x1, svfloat64_t x2, svfloat64_t y,
+ svbool_t cmp)
+{
+ return sv_call2_f64 (powr_specialcase, x1, x2, y, cmp);
+}
+
+/* Implementation of SVE powr.
+
+ Provides the same accuracy as AdvSIMD pow and powr, since it relies on the
+ same algorithm.
+
+ Maximum measured error is 1.04 ULPs:
+ SV_NAME_D2 (powr) (0x1.3d2d45bc848acp+63, -0x1.a48a38b40cd43p-12)
+ got 0x1.f7116284221fcp-1
+ want 0x1.f7116284221fdp-1. */
+svfloat64_t SV_NAME_D2 (powr) (svfloat64_t x, svfloat64_t y, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svuint64_t vix = svreinterpret_u64 (x);
+ svuint64_t viy = svreinterpret_u64 (y);
+
+ svbool_t xpos = svcmpge (pg, x, sv_f64 (0.0));
+
+ /* Special cases of x or y: zero, inf and nan. */
+ svbool_t xspecial = sv_zeroinfnan (xpos, vix);
+ svbool_t yspecial = sv_zeroinfnan (xpos, viy);
+ svbool_t cmp = svorr_z (xpos, xspecial, yspecial);
+
+ /* Cases of positive subnormal x: 0 < x < 0x1p-1022. */
+ svbool_t x_is_subnormal = svaclt (xpos, x, 0x1p-1022);
+ if (__glibc_unlikely (svptest_any (xpos, x_is_subnormal)))
+ {
+ /* Normalize subnormal x so exponent becomes negative. */
+ svuint64_t vix_norm
+ = svreinterpret_u64 (svmul_m (x_is_subnormal, x, 0x1p52));
+ vix = svsub_m (x_is_subnormal, vix_norm, 52ULL << 52);
+ }
+
+ svfloat64_t vlo;
+ svfloat64_t vhi = sv_log_inline (xpos, vix, &vlo, d);
+
+ svfloat64_t vehi = svmul_x (svptrue_b64 (), y, vhi);
+ svfloat64_t vemi = svmls_x (xpos, vehi, y, vhi);
+ svfloat64_t velo = svnmls_x (xpos, vemi, y, vlo);
+ svfloat64_t vz = sv_exp_inline (xpos, vehi, velo, sv_u64 (0), d);
+
+ /* Cases of negative x. */
+ vz = svsel (xpos, vz, sv_f64 (__builtin_nan ("")));
+
+ if (__glibc_unlikely (svptest_any (cmp, cmp)))
+ return sv_powr_specialcase (x, y, vz, cmp);
+
+ return vz;
+}
--- /dev/null
+/* Single-precision vector (AdvSIMD) powr function
+
+ Copyright (C) 2026 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "flt-32/math_config.h"
+#include "v_math.h"
+#include "v_powrf_inline.h"
+
+/* A scalar subroutine used to fix main powrf special cases. */
+static inline float
+powrf_specialcase (float x, float y)
+{
+ /* Negative x returns NaN (+0/-0 and NaN x not handled here). */
+ if (x < 0)
+ return __builtin_nanf ("");
+
+ uint32_t ix = asuint (x);
+ uint32_t iy = asuint (y);
+ /* y is 0, Inf or NaN. */
+ if (__glibc_unlikely (zeroinfnan (iy)))
+ {
+ /* |x| or |y| is NaN. */
+ if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
+ return __builtin_nanf ("");
+ /* |y| = 0. */
+ if (2 * iy == 0)
+ {
+ /* |x| = 0 or inf. */
+ if ((2 * ix == 0) || (2 * ix == 2u * 0x7f800000))
+ return __builtin_nanf ("");
+ /* x is finite. */
+ return 1.0f;
+ }
+ /* |y| = Inf and x = 1.0. */
+ if (ix == 0x3f800000)
+ return __builtin_nanf ("");
+ /* |x| < 1 and y = Inf or |x| > 1 and y = -Inf. */
+ if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
+ return 0.0f;
+ /* |y| = Inf and previous conditions not met. */
+ return y * y;
+ }
+ /* x is 0, Inf or NaN. Negative x are handled in the core. */
+ if (__glibc_unlikely (zeroinfnan (ix)))
+ {
+ float x2 = x * x;
+ return iy & 0x80000000 ? 1 / x2 : x2;
+ }
+
+ /* Return x for convenience, but make sure result is never used. */
+ return x;
+}
+
+/* Special case function wrapper. */
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, float32x4_t ret, uint32x4_t cmp)
+{
+ return v_call2_f32 (powrf_specialcase, x, y, ret, cmp);
+}
+
+/* Power implementation for x containing negative or subnormal lanes. */
+static inline float32x4_t
+v_powrf_x_is_neg_or_sub (float32x4_t x, float32x4_t y, const struct data *d)
+{
+ uint32x4_t xsmall = vcaltq_f32 (x, v_f32 (0x1p-126f));
+
+ /* Normalize subnormals. */
+ float32x4_t a = vabsq_f32 (x);
+ uint32x4_t ia_norm = vreinterpretq_u32_f32 (vmulq_f32 (a, d->norm));
+ ia_norm = vsubq_u32 (ia_norm, d->subnormal_bias);
+ a = vbslq_f32 (xsmall, vreinterpretq_f32_u32 (ia_norm), a);
+
+ /* Evaluate exp (y * log(x)) using |x| and sign bias correction. */
+ float32x4_t ret = v_powrf_core (a, y, d);
+
+ /* Cases of finite y and finite negative x. */
+ uint32x4_t xisneg = vcltzq_f32 (x);
+ return vbslq_f32 (xisneg, d->nan, ret);
+}
+
+/* Implementation of AdvSIMD powrf.
+
+ powr(x,y) := exp(y * log (x))
+
+ This means powr(x,y) core computation matches that of pow(x,y)
+ but powr returns NaN for negative x even if y is an integer.
+
+ Maximum measured error is 2.57 ULPs:
+ V_NAME_F2 (powr) (0x1.031706p+0, 0x1.ce2ec2p+12)
+ got 0x1.fff868p+127
+ want 0x1.fff862p+127. */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (powr) (float32x4_t x, float32x4_t y)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* Special cases of x or y: zero, inf and nan. */
+ uint32x4_t ix = vreinterpretq_u32_f32 (x);
+ uint32x4_t iy = vreinterpretq_u32_f32 (y);
+ uint32x4_t xspecial = v_zeroinfnan (d, ix);
+ uint32x4_t yspecial = v_zeroinfnan (d, iy);
+ uint32x4_t cmp = vorrq_u32 (xspecial, yspecial);
+
+ /* Evaluate pow(x, y) for x containing negative or subnormal lanes. */
+ uint32x4_t x_is_neg_or_sub = vcltq_f32 (x, v_f32 (0x1p-126f));
+ if (__glibc_unlikely (v_any_u32 (x_is_neg_or_sub)))
+ {
+ float32x4_t ret = v_powrf_x_is_neg_or_sub (x, y, d);
+ if (__glibc_unlikely (v_any_u32 (cmp)))
+ return special_case (x, y, ret, cmp);
+ return ret;
+ }
+
+ /* Else evaluate pow(x, y) for normal and positive x only. */
+ if (__glibc_unlikely (v_any_u32 (cmp)))
+ return special_case (x, y, v_powrf_core (x, y, d), cmp);
+ return v_powrf_core (x, y, d);
+}
+
+libmvec_hidden_def (V_NAME_F2 (powr))
+HALF_WIDTH_ALIAS_F2 (powr)
--- /dev/null
+/* Single-precision vector (SVE) powr function
+
+ Copyright (C) 2026 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "flt-32/math_config.h"
+#include "sv_math.h"
+
+#define WANT_SV_POWF_SIGN_BIAS 0
+#include "sv_powf_inline.h"
+
+/* A scalar subroutine used to fix main powrf special cases. */
+static inline float
+powrf_specialcase (float x, float y)
+{
+ uint32_t ix = asuint (x);
+ uint32_t iy = asuint (y);
+ /* |y| is 0, Inf or NaN. */
+ if (__glibc_unlikely (zeroinfnan (iy)))
+ {
+ /* |x| or |y| is NaN. */
+ if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
+ return __builtin_nanf ("");
+ /* |y| = 0. */
+ if (2 * iy == 0)
+ {
+ /* |x| = 0 or Inf. */
+ if ((2 * ix == 0) || (2 * ix == 2u * 0x7f800000))
+ return __builtin_nanf ("");
+ /* x is finite. */
+ return 1.0f;
+ }
+ /* |y| = Inf and x = 1.0. */
+ if (ix == 0x3f800000)
+ return __builtin_nanf ("");
+ /* |x| < 1 and y = Inf or |x| > 1 and y = -Inf. */
+ if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
+ return 0.0f;
+ /* |y| = Inf and previous conditions not met. */
+ return y * y;
+ }
+ /* x is 0, Inf or NaN. Negative x are handled in the core. */
+ if (__glibc_unlikely (zeroinfnan (ix)))
+ {
+ float x2 = x * x;
+ return iy & 0x80000000 ? 1 / x2 : x2;
+ }
+ /* Return x for convenience, but make sure result is never used. */
+ return x;
+}
+
+/* Scalar fallback for special case routines with custom signature. */
+static svfloat32_t NOINLINE
+sv_call_powrf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y, svbool_t cmp)
+{
+ return sv_call2_f32 (powrf_specialcase, x1, x2, y, cmp);
+}
+
+/* Implementation of SVE powrf.
+
+ Provides the same accuracy as AdvSIMD powf and powrf, since it relies on the
+ same algorithm.
+
+ Maximum measured error is 2.57 ULPs:
+ SV_NAME_F2 (powr) (0x1.031706p+0, 0x1.ce2ec2p+12)
+ got 0x1.fff868p+127
+ want 0x1.fff862p+127. */
+svfloat32_t SV_NAME_F2 (powr) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svuint32_t vix = svreinterpret_u32 (x);
+ svuint32_t viy = svreinterpret_u32 (y);
+
+ svbool_t xpos = svcmpge (pg, x, sv_f32 (0.0f));
+
+ /* Special cases of x or y: zero, inf and nan. */
+ svbool_t xspecial = sv_zeroinfnan (xpos, vix);
+ svbool_t yspecial = sv_zeroinfnan (xpos, viy);
+ svbool_t cmp = svorr_z (xpos, xspecial, yspecial);
+
+ /* Cases of subnormal x: |x| < 0x1p-126. */
+ svbool_t x_is_subnormal = svaclt (xpos, x, d->small_bound);
+ if (__glibc_unlikely (svptest_any (xpos, x_is_subnormal)))
+ {
+ /* Normalize subnormal x so exponent becomes negative. */
+ vix = svreinterpret_u32 (svmul_m (x_is_subnormal, x, 0x1p23f));
+ vix = svsub_m (x_is_subnormal, vix, d->subnormal_bias);
+ }
+
+ /* Part of core computation carried in working precision. */
+ svuint32_t tmp = svsub_x (xpos, vix, d->off);
+ svuint32_t i
+ = svand_x (xpos, svlsr_x (xpos, tmp, (23 - V_POWF_LOG2_TABLE_BITS)),
+ V_POWF_LOG2_N - 1);
+ svuint32_t top = svand_x (xpos, tmp, 0xff800000);
+ svuint32_t iz = svsub_x (xpos, vix, top);
+ svint32_t k
+ = svasr_x (xpos, svreinterpret_s32 (top), (23 - V_POWF_EXP2_TABLE_BITS));
+
+ /* Compute core in extended precision and return intermediate ylogx results
+ to handle cases of underflow and underflow in exp. */
+ svfloat32_t ylogx;
+ /* Pass a dummy sign_bias so we can re-use powf core.
+ The core is simplified by setting WANT_SV_POWF_SIGN_BIAS = 0. */
+ svfloat32_t ret = sv_powf_core (xpos, i, iz, k, y, sv_u32 (0), &ylogx, d);
+
+ /* Handle exp special cases of underflow and overflow. */
+ svbool_t no_uflow = svcmpgt (xpos, ylogx, d->uflow_bound);
+ svbool_t oflow = svcmpgt (xpos, ylogx, d->oflow_bound);
+ svfloat32_t ret_flow = svdup_n_f32_z (no_uflow, INFINITY);
+ ret = svsel (svorn_z (xpos, oflow, no_uflow), ret_flow, ret);
+
+ /* Cases of negative x. */
+ ret = svsel (xpos, ret, sv_f32 (__builtin_nanf ("")));
+
+ if (__glibc_unlikely (svptest_any (cmp, cmp)))
+ return sv_call_powrf_sc (x, y, ret, cmp);
+
+ return ret;
+}
VPCS_VECTOR_WRAPPER (log2_advsimd, _ZGVnN2v_log2)
VPCS_VECTOR_WRAPPER (log2p1_advsimd, _ZGVnN2v_log2p1)
VPCS_VECTOR_WRAPPER_ff (pow_advsimd, _ZGVnN2vv_pow)
+VPCS_VECTOR_WRAPPER_ff (powr_advsimd, _ZGVnN2vv_powr)
VPCS_VECTOR_WRAPPER (rsqrt_advsimd, _ZGVnN2v_rsqrt)
VPCS_VECTOR_WRAPPER (sin_advsimd, _ZGVnN2v_sin)
VPCS_VECTOR_WRAPPER (sinh_advsimd, _ZGVnN2v_sinh)
SVE_VECTOR_WRAPPER (log2_sve, _ZGVsMxv_log2)
SVE_VECTOR_WRAPPER (log2p1_sve, _ZGVsMxv_log2p1)
SVE_VECTOR_WRAPPER_ff (pow_sve, _ZGVsMxvv_pow)
+SVE_VECTOR_WRAPPER_ff (powr_sve, _ZGVsMxvv_powr)
SVE_VECTOR_WRAPPER (rsqrt_sve, _ZGVsMxv_rsqrt)
SVE_VECTOR_WRAPPER (sin_sve, _ZGVsMxv_sin)
SVE_VECTOR_WRAPPER (sinh_sve, _ZGVsMxv_sinh)
VPCS_VECTOR_WRAPPER (log2f_advsimd, _ZGVnN4v_log2f)
VPCS_VECTOR_WRAPPER (log2p1f_advsimd, _ZGVnN4v_log2p1f)
VPCS_VECTOR_WRAPPER_ff (powf_advsimd, _ZGVnN4vv_powf)
+VPCS_VECTOR_WRAPPER_ff (powrf_advsimd, _ZGVnN4vv_powrf)
VPCS_VECTOR_WRAPPER (rsqrtf_advsimd, _ZGVnN4v_rsqrtf)
VPCS_VECTOR_WRAPPER (sinf_advsimd, _ZGVnN4v_sinf)
VPCS_VECTOR_WRAPPER (sinhf_advsimd, _ZGVnN4v_sinhf)
SVE_VECTOR_WRAPPER (log2f_sve, _ZGVsMxv_log2f)
SVE_VECTOR_WRAPPER (log2p1f_sve, _ZGVsMxv_log2p1f)
SVE_VECTOR_WRAPPER_ff (powf_sve, _ZGVsMxvv_powf)
+SVE_VECTOR_WRAPPER_ff (powrf_sve, _ZGVsMxvv_powrf)
SVE_VECTOR_WRAPPER (rsqrtf_sve, _ZGVsMxv_rsqrtf)
SVE_VECTOR_WRAPPER (sinf_sve, _ZGVsMxv_sinf)
SVE_VECTOR_WRAPPER (sinhf_sve, _ZGVsMxv_sinhf)
/* Helper for AdvSIMD single-precision powr
- Copyright (C) 2025 Free Software Foundation, Inc.
+ Copyright (C) 2025-2026 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
+#include "powf_common.h"
+
#define Log2IdxMask (V_POWF_LOG2_N - 1)
#define Exp2IdxMask (V_POWF_EXP2_N - 1)
#define Scale ((double) V_POWF_EXP2_N)
GLIBC_2.43 _ZGVsMxv_log2p1f F
GLIBC_2.43 _ZGVsMxv_rsqrt F
GLIBC_2.43 _ZGVsMxv_rsqrtf F
+GLIBC_2.44 _ZGVnN2vv_powr F
+GLIBC_2.44 _ZGVnN2vv_powrf F
+GLIBC_2.44 _ZGVnN4vv_powrf F
+GLIBC_2.44 _ZGVsMxvv_powr F
+GLIBC_2.44 _ZGVsMxvv_powrf F