]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
math: Use atanpif from CORE-MATH
authorAdhemerval Zanella <adhemerval.zanella@linaro.org>
Fri, 20 Dec 2024 16:18:32 +0000 (13:18 -0300)
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>
Wed, 12 Feb 2025 19:31:57 +0000 (16:31 -0300)
The CORE-MATH implementation is correctly rounded (for any rounding mode)
and shows better performance to the generic atanpif.

The code was adapted to glibc style and to use the definition of
math_config.h (to handle errno, overflow, and underflow).

Benchtest on x64_64 (Ryzen 9 5900X, gcc 14.2.1), aarch64 (Neoverse-N1,
gcc 13.3.1), and powerpc (POWER10, gcc 13.2.1):

latency                     master        patched   improvement
x86_64                     66.3296        52.7558        20.46%
x86_64v2                   66.0429        51.4007        22.17%
x86_64v3                   60.6294        48.7876        19.53%
aarch64 (Neoverse)         24.3163        20.9110        14.00%
power8                     16.5766        13.3620        19.39%
power10                    16.5115        13.4072        18.80%

reciprocal-throughput       master        patched   improvement
x86_64                     30.8599        16.0866        47.87%
x86_64v2                   29.2286        15.4688        47.08%
x86_64v3                   23.0960        12.8510        44.36%
aarch64 (Neoverse)         15.4619        10.6752        30.96%
power8                      7.9200         5.2483        33.73%
power10                     6.8539         4.6262        32.50%

Reviewed-by: DJ Delorie <dj@redhat.com>
21 files changed:
SHARED-FILES
math/auto-libm-test-in
math/auto-libm-test-out-atanpi
sysdeps/aarch64/libm-test-ulps
sysdeps/arc/fpu/libm-test-ulps
sysdeps/arc/nofpu/libm-test-ulps
sysdeps/arm/libm-test-ulps
sysdeps/hppa/fpu/libm-test-ulps
sysdeps/i386/fpu/libm-test-ulps
sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
sysdeps/ieee754/flt-32/s_atanpif.c [new file with mode: 0644]
sysdeps/loongarch/lp64/libm-test-ulps
sysdeps/mips/mips64/libm-test-ulps
sysdeps/or1k/fpu/libm-test-ulps
sysdeps/or1k/nofpu/libm-test-ulps
sysdeps/powerpc/fpu/libm-test-ulps
sysdeps/riscv/nofpu/libm-test-ulps
sysdeps/riscv/rvd/libm-test-ulps
sysdeps/s390/fpu/libm-test-ulps
sysdeps/sparc/fpu/libm-test-ulps
sysdeps/x86_64/fpu/libm-test-ulps

index b403a2a6f010131e0bfd7ff5ff4174f614c627a5..5702a2d1c33c89c6afe5a29dfbe314fcf2978c31 100644 (file)
@@ -346,3 +346,7 @@ sysdeps/ieee754/flt-32/s_atan2pif.c:
   (src/binary32/atan2pi/atan2pif.c in CORE-MATH)
   - the code was adapted to use glibc code style and internal
     functions to handle errno, overflow, and underflow.
+sysdeps/ieee754/flt-32/s_atanpif.c:
+  (src/binary32/atanpi/atanpif.c in CORE-MATH)
+  - the code was adapted to use glibc code style and internal
+    functions to handle errno, overflow, and underflow.
index 4f194da19d43a442a2c65507b62eb60f2f667fba..7dd9cde9c9c2d4addb8cb6ec0924508863e96e83 100644 (file)
@@ -936,6 +936,7 @@ atanpi -0x3.eb8e18p+0
 atanpi 0x3.53c188p+0
 atanpi -0x1.58c83p+0
 atanpi 0x1.626772p-1
+atanpi 0x1p-126
 atanpi min
 atanpi -min
 atanpi min_subnorm
index 9f9ded81d1404fac0b5e17a0a89ae9f092b1fc46..4d3454b4ae9213f9f83c2b58f2edb289f7766c6a 100644 (file)
@@ -1061,6 +1061,31 @@ atanpi 0x1.626772p-1
 = atanpi tonearest ibm128 0xb.133b9p-4 : 0x3.156898f544dca52f89f818e127p-4 : inexact-ok
 = atanpi towardzero ibm128 0xb.133b9p-4 : 0x3.156898f544dca52f89f818e127p-4 : inexact-ok
 = atanpi upward ibm128 0xb.133b9p-4 : 0x3.156898f544dca52f89f818e128p-4 : inexact-ok
+atanpi 0x1p-126
+= atanpi downward binary32 0x4p-128 : 0x1.45f3p-128 : inexact-ok underflow errno-erange-ok
+= atanpi tonearest binary32 0x4p-128 : 0x1.45f308p-128 : inexact-ok underflow errno-erange-ok
+= atanpi towardzero binary32 0x4p-128 : 0x1.45f3p-128 : inexact-ok underflow errno-erange-ok
+= atanpi upward binary32 0x4p-128 : 0x1.45f308p-128 : inexact-ok underflow errno-erange-ok
+= atanpi downward binary64 0x4p-128 : 0x1.45f306dc9c882p-128 : inexact-ok
+= atanpi tonearest binary64 0x4p-128 : 0x1.45f306dc9c883p-128 : inexact-ok
+= atanpi towardzero binary64 0x4p-128 : 0x1.45f306dc9c882p-128 : inexact-ok
+= atanpi upward binary64 0x4p-128 : 0x1.45f306dc9c883p-128 : inexact-ok
+= atanpi downward intel96 0x4p-128 : 0x1.45f306dc9c882a52p-128 : inexact-ok
+= atanpi tonearest intel96 0x4p-128 : 0x1.45f306dc9c882a54p-128 : inexact-ok
+= atanpi towardzero intel96 0x4p-128 : 0x1.45f306dc9c882a52p-128 : inexact-ok
+= atanpi upward intel96 0x4p-128 : 0x1.45f306dc9c882a54p-128 : inexact-ok
+= atanpi downward m68k96 0x4p-128 : 0x1.45f306dc9c882a52p-128 : inexact-ok
+= atanpi tonearest m68k96 0x4p-128 : 0x1.45f306dc9c882a54p-128 : inexact-ok
+= atanpi towardzero m68k96 0x4p-128 : 0x1.45f306dc9c882a52p-128 : inexact-ok
+= atanpi upward m68k96 0x4p-128 : 0x1.45f306dc9c882a54p-128 : inexact-ok
+= atanpi downward binary128 0x4p-128 : 0x1.45f306dc9c882a53f84eafa3ea69p-128 : inexact-ok
+= atanpi tonearest binary128 0x4p-128 : 0x1.45f306dc9c882a53f84eafa3ea6ap-128 : inexact-ok
+= atanpi towardzero binary128 0x4p-128 : 0x1.45f306dc9c882a53f84eafa3ea69p-128 : inexact-ok
+= atanpi upward binary128 0x4p-128 : 0x1.45f306dc9c882a53f84eafa3ea6ap-128 : inexact-ok
+= atanpi downward ibm128 0x4p-128 : 0x1.45f306dc9c882a53f84eafa3eap-128 : inexact-ok
+= atanpi tonearest ibm128 0x4p-128 : 0x1.45f306dc9c882a53f84eafa3ea8p-128 : inexact-ok
+= atanpi towardzero ibm128 0x4p-128 : 0x1.45f306dc9c882a53f84eafa3eap-128 : inexact-ok
+= atanpi upward ibm128 0x4p-128 : 0x1.45f306dc9c882a53f84eafa3ea8p-128 : inexact-ok
 atanpi min
 = atanpi downward binary32 0x4p-128 : 0x1.45f3p-128 : inexact-ok underflow errno-erange-ok
 = atanpi tonearest binary32 0x4p-128 : 0x1.45f308p-128 : inexact-ok underflow errno-erange-ok
index be29b377215f41ce57069521a83490164dc12e83..10f182a2118a9e0a725b94566ef2d320e8e3ce44 100644 (file)
@@ -218,22 +218,18 @@ ldouble: 4
 
 Function: "atanpi":
 double: 2
-float: 1
 ldouble: 2
 
 Function: "atanpi_downward":
 double: 1
-float: 2
 ldouble: 1
 
 Function: "atanpi_towardzero":
 double: 1
-float: 2
 ldouble: 1
 
 Function: "atanpi_upward":
 double: 1
-float: 1
 ldouble: 2
 
 Function: "cabs":
index 1383c88b95535d41b6650a08caaf0346edd34b5c..7fb407cecddd5ca1b8df77084567744da0500231 100644 (file)
@@ -123,19 +123,15 @@ double: 3
 
 Function: "atanpi":
 double: 2
-float: 1
 
 Function: "atanpi_downward":
 double: 1
-float: 2
 
 Function: "atanpi_towardzero":
 double: 1
-float: 2
 
 Function: "atanpi_upward":
 double: 1
-float: 1
 
 Function: "cabs":
 double: 1
index 9028f5cbe750c244ce06d99662ed546f6e01a693..1859c2bd4fec01bace388119028cf244b64a6387 100644 (file)
@@ -30,7 +30,6 @@ double: 2
 
 Function: "atanpi":
 double: 2
-float: 1
 
 Function: "cabs":
 double: 1
index e1c538f79fd99f883f2d2277a6036ecf4cef30c2..fa9864adee2e9a9a95d2e958a105f2eaea6fed4b 100644 (file)
@@ -120,19 +120,15 @@ double: 3
 
 Function: "atanpi":
 double: 2
-float: 1
 
 Function: "atanpi_downward":
 double: 1
-float: 2
 
 Function: "atanpi_towardzero":
 double: 1
-float: 2
 
 Function: "atanpi_upward":
 double: 1
-float: 1
 
 Function: "cabs":
 double: 1
index 796da7b5aba0b791460476d04b7fad0720658d0b..a59f61fc4ebb4fdaef6e0dcfac80671f35086096 100644 (file)
@@ -120,19 +120,15 @@ double: 3
 
 Function: "atanpi":
 double: 2
-float: 1
 
 Function: "atanpi_downward":
 double: 1
-float: 2
 
 Function: "atanpi_towardzero":
 double: 1
-float: 2
 
 Function: "atanpi_upward":
 double: 1
-float: 1
 
 Function: "cabs":
 double: 1
index 4f687c762b14a97b70db02ba9f3fd606e45f707f..8aa52f46007a01657d0e6c28b65b1a472cc36886 100644 (file)
@@ -201,25 +201,21 @@ ldouble: 5
 
 Function: "atanpi":
 double: 1
-float: 1
 float128: 2
 ldouble: 1
 
 Function: "atanpi_downward":
 double: 2
-float: 2
 float128: 1
 ldouble: 2
 
 Function: "atanpi_towardzero":
 double: 1
-float: 2
 float128: 1
 ldouble: 2
 
 Function: "atanpi_upward":
 double: 2
-float: 1
 float128: 2
 ldouble: 1
 
index f24c87b3026342cf7495af641dfdb533ee67be50..80326368088f71db080c9377e104c6206e6c2367 100644 (file)
@@ -201,25 +201,21 @@ ldouble: 5
 
 Function: "atanpi":
 double: 1
-float: 1
 float128: 2
 ldouble: 2
 
 Function: "atanpi_downward":
 double: 2
-float: 2
 float128: 1
 ldouble: 2
 
 Function: "atanpi_towardzero":
 double: 1
-float: 2
 float128: 1
 ldouble: 2
 
 Function: "atanpi_upward":
 double: 2
-float: 1
 float128: 2
 ldouble: 1
 
diff --git a/sysdeps/ieee754/flt-32/s_atanpif.c b/sysdeps/ieee754/flt-32/s_atanpif.c
new file mode 100644 (file)
index 0000000..8d78163
--- /dev/null
@@ -0,0 +1,109 @@
+/* Correctly-rounded half-revolution arc-tangent of binary32 value.
+
+Copyright (c) 2022-2025 Alexei Sibidanov.
+
+The original version of this file was copied from the CORE-MATH
+project (file src/binary32/atanpi/atanpif.c, revision e02000e).
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+*/
+
+#include <errno.h>
+#include <math.h>
+#include <stdint.h>
+#include <libm-alias-float.h>
+#include "math_config.h"
+
+float
+__atanpif (float x)
+{
+  uint32_t t = asuint (x);
+  int32_t e = (t >> 23) & 0xff;
+  bool gt = e >= 127;
+  if (__glibc_unlikely (e > 127 + 24))
+    {
+      float f = copysignf (0.5f, x);
+      if (__glibc_unlikely (e == 0xff))
+       {
+         if (t << 9)
+           return x + x; /* nan */
+         return f;       /* inf */
+       }
+      /* Warning: 0x1.45f306p-2f / x underflows for |x| >= 0x1.45f306p+124 */
+      if (fabsf (x) >= 0x1.45f306p+124f)
+       return f - copysign (0x1p-26f, x);
+      else
+       return f - 0x1.45f306p-2f / x;
+    }
+  double z = x;
+  if (__glibc_unlikely (e < 127 - 13))
+    {
+      double sx = z * 0x1.45f306dc9c883p-2;
+      if (__glibc_unlikely (e < 127 - 25))
+       {
+         float rsx = sx;
+         if (x != 0 && rsx == 0)
+           __set_errno (ERANGE);
+         return rsx;
+       }
+      return sx - (0x1.5555555555555p-2 * sx) * (x * x);
+    }
+  uint32_t ax = t & (~0u >> 1);
+  if (__glibc_unlikely (ax == 0x3fa267ddu))
+    return copysignf (0x1.267004p-2f, x) - copysignf (0x1p-55f, x);
+  if (__glibc_unlikely (ax == 0x3f693531u))
+    return copysignf (0x1.e1a662p-3f, x) + copysignf (0x1p-28f, x);
+  if (__glibc_unlikely (ax == 0x3f800000u))
+    return copysignf (0x1p-2f, x);
+  if (gt)
+    z = 1 / z;
+  double z2 = z * z;
+  double z4 = z2 * z2;
+  double z8 = z4 * z4;
+  static const double cn[] =
+    {
+      0x1.45f306dc9c882p-2, 0x1.733b561bc23d5p-1, 0x1.28d9805bdfbf2p-1,
+      0x1.8c3ba966ae287p-3, 0x1.94a7f81ee634bp-6, 0x1.a6bbf6127a6dfp-11
+    };
+  static const double cd[] =
+    {
+      0x1p+0,               0x1.4e3b3ecc2518fp+1, 0x1.3ef4a360ff063p+1,
+      0x1.0f1dc55bad551p+0, 0x1.8da0fecc018a4p-3, 0x1.8fa87803776bfp-7,
+      0x1.dadf2ca0acb43p-14
+    };
+  double cn0 = cn[0] + z2 * cn[1];
+  double cn2 = cn[2] + z2 * cn[3];
+  double cn4 = cn[4] + z2 * cn[5];
+  cn0 += z4 * cn2;
+  cn0 += z8 * cn4;
+  cn0 *= z;
+  double cd0 = cd[0] + z2 * cd[1];
+  double cd2 = cd[2] + z2 * cd[3];
+  double cd4 = cd[4] + z2 * cd[5];
+  double cd6 = cd[6];
+  cd0 += z4 * cd2;
+  cd4 += z4 * cd6;
+  cd0 += z8 * cd4;
+  double r = cn0 / cd0;
+  if (gt)
+    r = copysign (0.5, z) - r;
+  return r;
+}
+libm_alias_float (__atanpi, atanpi)
index d5adc119cfd0c96c788741350468d80ef7018aa5..0cac55cbe41bef92f9f2cdd5b4cb9143bf9b370d 100644 (file)
@@ -162,22 +162,18 @@ ldouble: 4
 
 Function: "atanpi":
 double: 2
-float: 1
 ldouble: 2
 
 Function: "atanpi_downward":
 double: 1
-float: 2
 ldouble: 1
 
 Function: "atanpi_towardzero":
 double: 1
-float: 2
 ldouble: 1
 
 Function: "atanpi_upward":
 double: 1
-float: 1
 ldouble: 2
 
 Function: "cabs":
index c901b00f2039295a8c145e6d972dcb1f594b3c0d..1b5bcff11e14ca57a769d748ed507fe49b2ccea5 100644 (file)
@@ -162,22 +162,18 @@ ldouble: 4
 
 Function: "atanpi":
 double: 2
-float: 1
 ldouble: 2
 
 Function: "atanpi_downward":
 double: 1
-float: 2
 ldouble: 1
 
 Function: "atanpi_towardzero":
 double: 1
-float: 2
 ldouble: 1
 
 Function: "atanpi_upward":
 double: 1
-float: 1
 ldouble: 2
 
 Function: "cabs":
index 9934382bde6220d301ac3a14b48804be7b497423..a608e3c949af10ba92268dbd402434f4b89e7a78 100644 (file)
@@ -120,19 +120,15 @@ double: 3
 
 Function: "atanpi":
 double: 2
-float: 1
 
 Function: "atanpi_downward":
 double: 1
-float: 2
 
 Function: "atanpi_towardzero":
 double: 1
-float: 2
 
 Function: "atanpi_upward":
 double: 1
-float: 1
 
 Function: "cabs":
 double: 1
index 7ff5ee4425bdc12464471e01d14cfa62727c3308..56986f0be02744ecedea8c60c8bf81f719a457e4 100644 (file)
@@ -93,7 +93,6 @@ double: 3
 
 Function: "atanpi":
 double: 2
-float: 1
 
 Function: "cabs":
 double: 1
index b1c01b4d94f81e450ac348861ff66a61122d971d..630111e6c45a468c792fc5326d359addf5db6582 100644 (file)
@@ -206,25 +206,21 @@ ldouble: 4
 
 Function: "atanpi":
 double: 2
-float: 1
 float128: 2
 ldouble: 1
 
 Function: "atanpi_downward":
 double: 1
-float: 2
 float128: 1
 ldouble: 2
 
 Function: "atanpi_towardzero":
 double: 1
-float: 2
 float128: 1
 ldouble: 3
 
 Function: "atanpi_upward":
 double: 1
-float: 1
 float128: 2
 ldouble: 5
 
index f55df65c6a454a9db794e25e403aa21429dc0751..087dcd79fc849b471623ce182b89856f9e15bcb5 100644 (file)
@@ -126,7 +126,6 @@ ldouble: 4
 
 Function: "atanpi":
 double: 2
-float: 1
 ldouble: 2
 
 Function: "cabs":
index 879f5c5669bb584368782e7d1ea777e86f4e068e..efd83affa498f690788a2da1dc944ffc188284fa 100644 (file)
@@ -166,22 +166,18 @@ ldouble: 4
 
 Function: "atanpi":
 double: 2
-float: 1
 ldouble: 2
 
 Function: "atanpi_downward":
 double: 1
-float: 2
 ldouble: 1
 
 Function: "atanpi_towardzero":
 double: 1
-float: 2
 ldouble: 1
 
 Function: "atanpi_upward":
 double: 1
-float: 1
 ldouble: 2
 
 Function: "cabs":
index c4a27b96ad1fde71f951b051aa02fe85c500c17b..709debb205bb5b8fd15e833637324efd8192fa57 100644 (file)
@@ -162,22 +162,18 @@ ldouble: 4
 
 Function: "atanpi":
 double: 2
-float: 1
 ldouble: 2
 
 Function: "atanpi_downward":
 double: 1
-float: 2
 ldouble: 1
 
 Function: "atanpi_towardzero":
 double: 1
-float: 2
 ldouble: 1
 
 Function: "atanpi_upward":
 double: 1
-float: 1
 ldouble: 2
 
 Function: "cabs":
index fbf1507bd97ece6a819768228fbb15364a79565e..becf5da3d61529a97c7f8c3f63d34e72f62279ac 100644 (file)
@@ -162,22 +162,18 @@ ldouble: 4
 
 Function: "atanpi":
 double: 2
-float: 1
 ldouble: 2
 
 Function: "atanpi_downward":
 double: 1
-float: 2
 ldouble: 1
 
 Function: "atanpi_towardzero":
 double: 1
-float: 2
 ldouble: 1
 
 Function: "atanpi_upward":
 double: 1
-float: 1
 ldouble: 2
 
 Function: "cabs":
index a340df62434ba1eaabe6b7bda6c5bb1965460a05..8c5d4fd4710d4f3516d9507417b72e9823021bfc 100644 (file)
@@ -339,25 +339,21 @@ float: 1
 
 Function: "atanpi":
 double: 2
-float: 1
 float128: 2
 ldouble: 2
 
 Function: "atanpi_downward":
 double: 1
-float: 2
 float128: 1
 ldouble: 2
 
 Function: "atanpi_towardzero":
 double: 1
-float: 2
 float128: 1
 ldouble: 2
 
 Function: "atanpi_upward":
 double: 1
-float: 1
 float128: 2
 ldouble: 1