float64x2_t poly[9];
double half_pi[2];
float64x2_t two_over_pi, shift;
-#if !WANT_SIMD_EXCEPT
float64x2_t range_val;
-#endif
} data = {
/* Coefficients generated using FPMinimax. */
.poly = { V2 (0x1.5555555555556p-2), V2 (0x1.1111111110a63p-3),
.half_pi = { 0x1.921fb54442d18p0, 0x1.1a62633145c07p-54 },
.two_over_pi = V2 (0x1.45f306dc9c883p-1),
.shift = V2 (0x1.8p52),
-#if !WANT_SIMD_EXCEPT
.range_val = V2 (0x1p23),
-#endif
};
#define RangeVal 0x4160000000000000 /* asuint64(0x1p23). */
#define TinyBound 0x3e50000000000000 /* asuint64(2^-26). */
-#define Thresh 0x310000000000000 /* RangeVal - TinyBound. */
/* Special cases (fall back to scalar calls). */
static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t x)
+special_case (float64x2_t x, float64x2_t n, float64x2_t d, uint64x2_t special)
{
- return v_call_f64 (tan, x, x, v_u64 (-1));
+ return v_call_f64 (tan, x, vdivq_f64 (n, d), special);
}
/* Vector approximation for double-precision tan.
very large inputs. Fall back to scalar routine for all lanes if any are
too large, or Inf/NaN. If fenv exceptions are expected, also fall back for
tiny input to avoid underflow. */
-#if WANT_SIMD_EXCEPT
- uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x));
- /* iax - tiny_bound > range_val - tiny_bound. */
- uint64x2_t special
- = vcgtq_u64 (vsubq_u64 (iax, v_u64 (TinyBound)), v_u64 (Thresh));
- if (__glibc_unlikely (v_any_u64 (special)))
- return special_case (x);
-#endif
/* q = nearest integer to 2 * x / pi. */
float64x2_t q
/* Use q to reduce x to r in [-pi/4, pi/4], by:
r = x - q * pi/2, in extended precision. */
- float64x2_t r = x;
float64x2_t half_pi = vld1q_f64 (dat->half_pi);
- r = vfmsq_laneq_f64 (r, q, half_pi, 0);
+ float64x2_t r = vfmsq_laneq_f64 (x, q, half_pi, 0);
r = vfmsq_laneq_f64 (r, q, half_pi, 1);
/* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle
formula. */
uint64x2_t no_recip = vtstq_u64 (vreinterpretq_u64_s64 (qi), v_u64 (1));
-#if !WANT_SIMD_EXCEPT
uint64x2_t special = vcageq_f64 (x, dat->range_val);
+ float64x2_t swap = vbslq_f64 (no_recip, n, vnegq_f64 (d));
+ d = vbslq_f64 (no_recip, d, n);
+ n = swap;
+
if (__glibc_unlikely (v_any_u64 (special)))
- return special_case (x);
-#endif
+ return special_case (x, n, d, special);
- return vdivq_f64 (vbslq_f64 (no_recip, n, vnegq_f64 (d)),
- vbslq_f64 (no_recip, d, n));
+ return vdivq_f64 (n, d);
}