X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=sysdeps%2Faarch64%2Ffpu%2Ftan_advsimd.c;h=0459821ab25487a8194d0a5dbabf0e31daad389e;hb=e302e1021391d13a9611ba3a910df128830bd19e;hp=d7e5ba7b1ab941e8daabcadea9d6c324652f67c0;hpb=02782fd12849b6673cb5c2728cb750e8ec295aa3;p=thirdparty%2Fglibc.git diff --git a/sysdeps/aarch64/fpu/tan_advsimd.c b/sysdeps/aarch64/fpu/tan_advsimd.c index d7e5ba7b1a..0459821ab2 100644 --- a/sysdeps/aarch64/fpu/tan_advsimd.c +++ b/sysdeps/aarch64/fpu/tan_advsimd.c @@ -23,7 +23,7 @@ static const struct data { float64x2_t poly[9]; - float64x2_t half_pi_hi, half_pi_lo, two_over_pi, shift; + float64x2_t half_pi, two_over_pi, shift; #if !WANT_SIMD_EXCEPT float64x2_t range_val; #endif @@ -34,8 +34,7 @@ static const struct data V2 (0x1.226e5e5ecdfa3p-7), V2 (0x1.d6c7ddbf87047p-9), V2 (0x1.7ea75d05b583ep-10), V2 (0x1.289f22964a03cp-11), V2 (0x1.4e4fd14147622p-12) }, - .half_pi_hi = V2 (0x1.921fb54442d18p0), - .half_pi_lo = V2 (0x1.1a62633145c07p-54), + .half_pi = { 0x1.921fb54442d18p0, 0x1.1a62633145c07p-54 }, .two_over_pi = V2 (0x1.45f306dc9c883p-1), .shift = V2 (0x1.8p52), #if !WANT_SIMD_EXCEPT @@ -56,15 +55,15 @@ special_case (float64x2_t x) /* Vector approximation for double-precision tan. Maximum measured error is 3.48 ULP: - __v_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37 - want -0x1.f6ccd8ecf7deap+37. */ + _ZGVnN2v_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37 + want -0x1.f6ccd8ecf7deap+37. */ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x) { const struct data *dat = ptr_barrier (&data); - /* Our argument reduction cannot calculate q with sufficient accuracy for very - large inputs. Fall back to scalar routine for all lanes if any are too - large, or Inf/NaN. If fenv exceptions are expected, also fall back for tiny - input to avoid underflow. */ + /* Our argument reduction cannot calculate q with sufficient accuracy for + very large inputs. Fall back to scalar routine for all lanes if any are + too large, or Inf/NaN. If fenv exceptions are expected, also fall back for + tiny input to avoid underflow. */ #if WANT_SIMD_EXCEPT uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x)); /* iax - tiny_bound > range_val - tiny_bound. */ @@ -82,8 +81,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x) /* Use q to reduce x to r in [-pi/4, pi/4], by: r = x - q * pi/2, in extended precision. */ float64x2_t r = x; - r = vfmsq_f64 (r, q, dat->half_pi_hi); - r = vfmsq_f64 (r, q, dat->half_pi_lo); + r = vfmsq_laneq_f64 (r, q, dat->half_pi, 0); + r = vfmsq_laneq_f64 (r, q, dat->half_pi, 1); /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle formula. */ r = vmulq_n_f64 (r, 0.5); @@ -106,14 +105,15 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x) and reciprocity around pi/2: tan(x) = 1 / (tan(pi/2 - x)) to assemble result using change-of-sign and conditional selection of - numerator/denominator, dependent on odd/even-ness of q (hence quadrant). */ + numerator/denominator, dependent on odd/even-ness of q (hence quadrant). + */ float64x2_t n = vfmaq_f64 (v_f64 (-1), p, p); float64x2_t d = vaddq_f64 (p, p); uint64x2_t no_recip = vtstq_u64 (vreinterpretq_u64_s64 (qi), v_u64 (1)); #if !WANT_SIMD_EXCEPT - uint64x2_t special = vceqzq_u64 (vcaleq_f64 (x, dat->range_val)); + uint64x2_t special = vcageq_f64 (x, dat->range_val); if (__glibc_unlikely (v_any_u64 (special))) return special_case (x); #endif