aarch64: Optimise AdvSIMD atanhf

author James Chesterman <James.Chesterman@arm.com>

Fri, 28 Nov 2025 11:18:53 +0000 (11:18 +0000)

committer Adhemerval Zanella <adhemerval.zanella@linaro.org>

Thu, 4 Dec 2025 13:54:49 +0000 (10:54 -0300)
author James Chesterman <James.Chesterman@arm.com>
Fri, 28 Nov 2025 11:18:53 +0000 (11:18 +0000)
committer Adhemerval Zanella <adhemerval.zanella@linaro.org>
Thu, 4 Dec 2025 13:54:49 +0000 (10:54 -0300)
diff --git a/sysdeps/aarch64/fpu/atanhf_advsimd.c b/sysdeps/aarch64/fpu/atanhf_advsimd.c

index 93c41fbdc6f5dc530b6005c08befc7a691a596ab..8d2c956e13fa266a30c60b9241d4787516d6d780 100644 (file)
--- a/sysdeps/aarch64/fpu/atanhf_advsimd.c
+++ b/sysdeps/aarch64/fpu/atanhf_advsimd.c
@@ -23,46 +23,68 @@
  const static struct data
  {
    struct v_log1pf_data log1pf_consts;
+  uint32x4_t abs_mask;
+  float32x4_t half;
    uint32x4_t one;
+  float32x4_t pinf, minf, nan;
  } data = {
    .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE,
+  .abs_mask = V4 (0x7fffffff),
+  .half = V4 (0.5f),
    .one = V4 (0x3f800000),
+  .pinf = V4 (INFINITY),
+  .minf = V4 (-INFINITY),
+  .nan = V4 (NAN),
  };
  
-#define AbsMask v_u32 (0x7fffffff)
-#define Half v_u32 (0x3f000000)
-
-static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t halfsign, float32x4_t y,
-             uint32x4_t special)
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, float32x4_t halfsign,
+             uint32x4_t special, const struct data *d)
  {
-  return v_call_f32 (atanhf, vbslq_f32 (AbsMask, x, halfsign),
-                    vmulq_f32 (halfsign, y), special);
+  y = log1pf_inline (y, &d->log1pf_consts);
+  y = vmulq_f32 (halfsign, y);
+
+  float32x4_t float_one = vreinterpretq_f32_u32 (d->one);
+  /* Need the NOT here to make an input of NaN return true.  */
+  uint32x4_t ret_nan = vmvnq_u32 (vcaltq_f32 (x, float_one));
+  uint32x4_t ret_pinf = vceqzq_f32 (vsubq_f32 (x, float_one));
+  uint32x4_t ret_minf = vceqzq_f32 (vaddq_f32 (x, float_one));
+
+  y = vbslq_f32 (ret_nan, d->nan, y);
+  y = vbslq_f32 (ret_pinf, d->pinf, y);
+  y = vbslq_f32 (ret_minf, d->minf, y);
+  return y;
  }
  
  /* Approximation for vector single-precision atanh(x) using modified log1p.
     The maximum error is 2.93 ULP:
     _ZGVnN4v_atanhf(0x1.f43d7p-5) got 0x1.f4dcfep-5
                                 want 0x1.f4dcf8p-5.  */
-VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (atanh) (float32x4_t x)
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (atanh) (float32x4_t x)
  {
    const struct data *d = ptr_barrier (&data);
  
-  float32x4_t halfsign = vbslq_f32 (AbsMask, v_f32 (0.5), x);
+  /* This computes a 1/2 with the sign bit being the sign bit of x.
+    This is because:
+    atanh(x) = 1/2 ln ((1+x)/(1-x))
+    = 1/2 log1pf ((1+x)/(1-x) - 1)
+    = 1/2 log1pf (2x/(1-x)).  */
+  float32x4_t halfsign = vbslq_f32 (d->abs_mask, d->half, x);
    float32x4_t ax = vabsq_f32 (x);
    uint32x4_t iax = vreinterpretq_u32_f32 (ax);
  
+  /* Values between but not including -1 and 1 are treated appropriately by the
+    fast pass. -1 and 1 are asymptotes of atanh(x), so they are treated as
+    special cases along with any input outside of these bounds.  */
    uint32x4_t special = vcgeq_u32 (iax, d->one);
  
-  float32x4_t y = vdivq_f32 (vaddq_f32 (ax, ax),
+  float32x4_t r = vdivq_f32 (vaddq_f32 (ax, ax),
                              vsubq_f32 (vreinterpretq_f32_u32 (d->one), ax));
-  y = log1pf_inline (y, &d->log1pf_consts);
  
-  /* If exceptions not required, pass ax to special-case for shorter dependency
-     chain. If exceptions are required ax will have been zerofied, so have to
-     pass x.  */
    if (__glibc_unlikely (v_any_u32 (special)))
-    return special_case (ax, halfsign, y, special);
+    return special_case (x, r, halfsign, special, d);
+
+  float32x4_t y = log1pf_inline (r, &d->log1pf_consts);
    return vmulq_f32 (halfsign, y);
  }
  libmvec_hidden_def (V_NAME_F1 (atanh))
author	James Chesterman <James.Chesterman@arm.com>
	Fri, 28 Nov 2025 11:18:53 +0000 (11:18 +0000)
committer	Adhemerval Zanella <adhemerval.zanella@linaro.org>
	Thu, 4 Dec 2025 13:54:49 +0000 (10:54 -0300)