aarch64: Optimise AdvSIMD log1pf

author James Chesterman <James.Chesterman@arm.com>

Wed, 19 Nov 2025 14:11:37 +0000 (14:11 +0000)

committer Adhemerval Zanella <adhemerval.zanella@linaro.org>

Thu, 4 Dec 2025 11:31:02 +0000 (08:31 -0300)
author James Chesterman <James.Chesterman@arm.com>
Wed, 19 Nov 2025 14:11:37 +0000 (14:11 +0000)
committer Adhemerval Zanella <adhemerval.zanella@linaro.org>
Thu, 4 Dec 2025 11:31:02 +0000 (08:31 -0300)
diff --git a/sysdeps/aarch64/fpu/log1pf_advsimd.c b/sysdeps/aarch64/fpu/log1pf_advsimd.c

index 79e33e4aae974117f62926a1d80c66fe8129a847..537de5d511e677ed9684906ac62f4a60d2828c1c 100644 (file)
--- a/sysdeps/aarch64/fpu/log1pf_advsimd.c
+++ b/sysdeps/aarch64/fpu/log1pf_advsimd.c
@@ -20,28 +20,50 @@
  #include "v_math.h"
  #include "v_log1pf_inline.h"
  
-const static struct v_log1pf_data data = V_LOG1PF_CONSTANTS_TABLE;
+static const struct data
+{
+  struct v_log1pf_data d;
+  float32x4_t nan, pinf, minf;
+} data = {
+  .d = V_LOG1PF_CONSTANTS_TABLE,
+  .nan = V4 (NAN),
+  .pinf = V4 (INFINITY),
+  .minf = V4 (-INFINITY),
+};
  
-static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, uint32x4_t cmp)
+static inline float32x4_t
+special_case (float32x4_t x, uint32x4_t cmp, const struct data *d)
  {
-  return v_call_f32 (log1pf, x, log1pf_inline (x, ptr_barrier (&data)), cmp);
+  float32x4_t y = log1pf_inline (x, ptr_barrier (&d->d));
+  y = vbslq_f32 (cmp, d->nan, y);
+  uint32x4_t ret_pinf = vceqq_f32 (x, d->pinf);
+  uint32x4_t ret_minf = vceqq_f32 (x, v_f32 (-1.0));
+
+  y = vbslq_f32 (ret_pinf, d->pinf, y);
+  return vbslq_f32 (ret_minf, d->minf, y);
  }
  
-/* Vector log1pf approximation using polynomial on reduced interval. Worst-case
-   error is 1.63 ULP:
-   _ZGVnN4v_log1pf(0x1.216d12p-2) got 0x1.fdcb12p-3
-                                want 0x1.fdcb16p-3.  */
+/* Single-precision implementation of vector log1pf(x).
+  Maximum observed error: 1.20 + 0.5
+  _ZGVnN4v_log1pf(0x1.04418ap-2) got 0x1.cfcbd8p-3
+                               want 0x1.cfcbdcp-3.  */
  float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log1p) (float32x4_t x)
  {
-  uint32x4_t special_cases = vornq_u32 (vcleq_f32 (x, v_f32 (-1)),
-                                       vcaleq_f32 (x, v_f32 (0x1p127f)));
+  const struct data *d = ptr_barrier (&data);
  
-  if (__glibc_unlikely (v_any_u32 (special_cases)))
-    return special_case (x, special_cases);
+  /* Use signed integers here to ensure that negative numbers between 0 and -1
+    don't make this expression true.  */
+  uint32x4_t is_infnan
+      = vcgeq_s32 (vreinterpretq_s32_f32 (x), vreinterpretq_s32_f32 (d->pinf));
+  /* The OR-NOT is needed to catch -NaN.  */
+  uint32x4_t special = vornq_u32 (is_infnan, vcgtq_f32 (x, v_f32 (-1)));
  
-  return log1pf_inline (x, ptr_barrier (&data));
+  if (__glibc_unlikely (v_any_u32 (special)))
+    return special_case (x, special, d);
+
+  return log1pf_inline (x, &d->d);
  }
+
  libmvec_hidden_def (V_NAME_F1 (log1p))
  HALF_WIDTH_ALIAS_F1 (log1p)
  strong_alias (V_NAME_F1 (log1p), V_NAME_F1 (logp1))
author	James Chesterman <James.Chesterman@arm.com>
	Wed, 19 Nov 2025 14:11:37 +0000 (14:11 +0000)
committer	Adhemerval Zanella <adhemerval.zanella@linaro.org>
	Thu, 4 Dec 2025 11:31:02 +0000 (08:31 -0300)