AArch64: Fix instability in AdvSIMD tan

author Joe Ramsay <Joe.Ramsay@arm.com>

Thu, 6 Nov 2025 18:26:54 +0000 (18:26 +0000)

committer Wilco Dijkstra <wilco.dijkstra@arm.com>

Thu, 6 Nov 2025 18:30:47 +0000 (18:30 +0000)
author Joe Ramsay <Joe.Ramsay@arm.com>
Thu, 6 Nov 2025 18:26:54 +0000 (18:26 +0000)
committer Wilco Dijkstra <wilco.dijkstra@arm.com>
Thu, 6 Nov 2025 18:30:47 +0000 (18:30 +0000)
diff --git a/sysdeps/aarch64/fpu/tan_advsimd.c b/sysdeps/aarch64/fpu/tan_advsimd.c

index 825c9754b362de012b7da74e1a2951fde5578075..d391a003d85d77f0243f9439e487a92218dfd148 100644 (file)
--- a/sysdeps/aarch64/fpu/tan_advsimd.c
+++ b/sysdeps/aarch64/fpu/tan_advsimd.c
@@ -25,9 +25,7 @@ static const struct data
    float64x2_t poly[9];
    double half_pi[2];
    float64x2_t two_over_pi, shift;
-#if !WANT_SIMD_EXCEPT
    float64x2_t range_val;
-#endif
  } data = {
    /* Coefficients generated using FPMinimax.  */
    .poly = { V2 (0x1.5555555555556p-2), V2 (0x1.1111111110a63p-3),
@@ -38,20 +36,17 @@ static const struct data
    .half_pi = { 0x1.921fb54442d18p0, 0x1.1a62633145c07p-54 },
    .two_over_pi = V2 (0x1.45f306dc9c883p-1),
    .shift = V2 (0x1.8p52),
-#if !WANT_SIMD_EXCEPT
    .range_val = V2 (0x1p23),
-#endif
  };
  
  #define RangeVal 0x4160000000000000  /* asuint64(0x1p23).  */
  #define TinyBound 0x3e50000000000000 /* asuint64(2^-26).  */
-#define Thresh 0x310000000000000     /* RangeVal - TinyBound.  */
  
  /* Special cases (fall back to scalar calls).  */
  static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t x)
+special_case (float64x2_t x, float64x2_t n, float64x2_t d, uint64x2_t special)
  {
-  return v_call_f64 (tan, x, x, v_u64 (-1));
+  return v_call_f64 (tan, x, vdivq_f64 (n, d), special);
  }
  
  /* Vector approximation for double-precision tan.
@@ -65,14 +60,6 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
       very large inputs. Fall back to scalar routine for all lanes if any are
       too large, or Inf/NaN. If fenv exceptions are expected, also fall back for
       tiny input to avoid underflow.  */
-#if WANT_SIMD_EXCEPT
-  uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x));
-  /* iax - tiny_bound > range_val - tiny_bound.  */
-  uint64x2_t special
-      = vcgtq_u64 (vsubq_u64 (iax, v_u64 (TinyBound)), v_u64 (Thresh));
-  if (__glibc_unlikely (v_any_u64 (special)))
-    return special_case (x);
-#endif
  
    /* q = nearest integer to 2 * x / pi.  */
    float64x2_t q
@@ -81,9 +68,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
  
    /* Use q to reduce x to r in [-pi/4, pi/4], by:
       r = x - q * pi/2, in extended precision.  */
-  float64x2_t r = x;
    float64x2_t half_pi = vld1q_f64 (dat->half_pi);
-  r = vfmsq_laneq_f64 (r, q, half_pi, 0);
+  float64x2_t r = vfmsq_laneq_f64 (x, q, half_pi, 0);
    r = vfmsq_laneq_f64 (r, q, half_pi, 1);
    /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle
       formula.  */
@@ -114,12 +100,13 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
  
    uint64x2_t no_recip = vtstq_u64 (vreinterpretq_u64_s64 (qi), v_u64 (1));
  
-#if !WANT_SIMD_EXCEPT
    uint64x2_t special = vcageq_f64 (x, dat->range_val);
+  float64x2_t swap = vbslq_f64 (no_recip, n, vnegq_f64 (d));
+  d = vbslq_f64 (no_recip, d, n);
+  n = swap;
+
    if (__glibc_unlikely (v_any_u64 (special)))
-    return special_case (x);
-#endif
+    return special_case (x, n, d, special);
  
-  return vdivq_f64 (vbslq_f64 (no_recip, n, vnegq_f64 (d)),
-                   vbslq_f64 (no_recip, d, n));
+  return vdivq_f64 (n, d);
  }
author	Joe Ramsay <Joe.Ramsay@arm.com>
	Thu, 6 Nov 2025 18:26:54 +0000 (18:26 +0000)
committer	Wilco Dijkstra <wilco.dijkstra@arm.com>
	Thu, 6 Nov 2025 18:30:47 +0000 (18:30 +0000)