]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
math: Use cospif from CORE-MATH
authorAdhemerval Zanella <adhemerval.zanella@linaro.org>
Fri, 20 Dec 2024 16:25:39 +0000 (13:25 -0300)
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>
Wed, 12 Feb 2025 19:31:57 +0000 (16:31 -0300)
The CORE-MATH implementation is correctly rounded (for any rounding mode)
and shows better performance to the generic cospif.

The code was adapted to glibc style and to use the definition of
math_config.h (to handle errno, overflow, and underflow).

Benchtest on x64_64 (Ryzen 9 5900X, gcc 14.2.1), aarch64 (Neoverse-N1,
gcc 13.3.1), and powerpc (POWER10, gcc 13.2.1):

latency                    master        patched   improvement
x86_64                    47.4679        38.4157        19.07%
x86_64v2                  46.9686        38.3329        18.39%
x86_64v3                  43.8929        31.8510        27.43%
aarch64 (Neoverse)        18.8867        13.2089        30.06%
power8                    22.9435         7.8023        65.99%
power10                   15.4472        7.77505        49.67%

reciprocal-throughput      master        patched   improvement
x86_64                    20.9518        11.4991        45.12%
x86_64v2                  19.8699        10.5921        46.69%
x86_64v3                  19.3475         9.3998        51.42%
aarch64 (Neoverse)        12.5767         6.2158        50.58%
power8                    15.0566         3.2654        78.31%
power10                    9.2866         3.1147        66.46%

Reviewed-by: DJ Delorie <dj@redhat.com>
19 files changed:
SHARED-FILES
sysdeps/aarch64/libm-test-ulps
sysdeps/arc/fpu/libm-test-ulps
sysdeps/arc/nofpu/libm-test-ulps
sysdeps/arm/libm-test-ulps
sysdeps/hppa/fpu/libm-test-ulps
sysdeps/i386/fpu/libm-test-ulps
sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
sysdeps/ieee754/flt-32/s_cospif.c [new file with mode: 0644]
sysdeps/loongarch/lp64/libm-test-ulps
sysdeps/mips/mips64/libm-test-ulps
sysdeps/or1k/fpu/libm-test-ulps
sysdeps/or1k/nofpu/libm-test-ulps
sysdeps/powerpc/fpu/libm-test-ulps
sysdeps/riscv/nofpu/libm-test-ulps
sysdeps/riscv/rvd/libm-test-ulps
sysdeps/s390/fpu/libm-test-ulps
sysdeps/sparc/fpu/libm-test-ulps
sysdeps/x86_64/fpu/libm-test-ulps

index 5702a2d1c33c89c6afe5a29dfbe314fcf2978c31..3ce38d154293dfb1f1a7adeba1388c3b7dc6cfa1 100644 (file)
@@ -350,3 +350,7 @@ sysdeps/ieee754/flt-32/s_atanpif.c:
   (src/binary32/atanpi/atanpif.c in CORE-MATH)
   - the code was adapted to use glibc code style and internal
     functions to handle errno, overflow, and underflow.
+sysdeps/ieee754/flt-32/s_cospif.c:
+  (src/binary32/cospi/cospif.c in CORE-MATH)
+  - the code was adapted to use glibc code style and internal
+    functions to handle errno, overflow, and underflow.
index 10f182a2118a9e0a725b94566ef2d320e8e3ce44..a15f3effa0559fce47f7c5cd2396272c1f2f90fa 100644 (file)
@@ -782,7 +782,6 @@ ldouble: 3
 
 Function: "cospi":
 double: 2
-float: 2
 ldouble: 2
 
 Function: "cospi_advsimd":
@@ -791,7 +790,6 @@ float: 1
 
 Function: "cospi_downward":
 double: 1
-float: 2
 ldouble: 2
 
 Function: "cospi_sve":
@@ -800,12 +798,10 @@ float: 1
 
 Function: "cospi_towardzero":
 double: 1
-float: 1
 ldouble: 2
 
 Function: "cospi_upward":
 double: 1
-float: 2
 ldouble: 2
 
 Function: Real part of "cpow":
index 7fb407cecddd5ca1b8df77084567744da0500231..f2acbf453e899a51a949370f0d27e4b9d6b35f22 100644 (file)
@@ -553,19 +553,15 @@ double: 3
 
 Function: "cospi":
 double: 2
-float: 2
 
 Function: "cospi_downward":
 double: 1
-float: 2
 
 Function: "cospi_towardzero":
 double: 1
-float: 1
 
 Function: "cospi_upward":
 double: 1
-float: 2
 
 Function: Real part of "cpow":
 double: 9
index 1859c2bd4fec01bace388119028cf244b64a6387..8716e5d29ee22fb8b6600bcd8b6dba64ad005922 100644 (file)
@@ -134,7 +134,6 @@ double: 2
 
 Function: "cospi":
 double: 2
-float: 2
 
 Function: Real part of "cpow":
 double: 2
index fa9864adee2e9a9a95d2e958a105f2eaea6fed4b..647f92944c58b0a3b03752806c02af7898f17a2b 100644 (file)
@@ -545,19 +545,15 @@ double: 2
 
 Function: "cospi":
 double: 2
-float: 2
 
 Function: "cospi_downward":
 double: 1
-float: 2
 
 Function: "cospi_towardzero":
 double: 1
-float: 1
 
 Function: "cospi_upward":
 double: 1
-float: 2
 
 Function: Real part of "cpow":
 double: 2
index a59f61fc4ebb4fdaef6e0dcfac80671f35086096..88f7701c0e541e69706d86dba3b282ee77a55169 100644 (file)
@@ -555,19 +555,15 @@ double: 2
 
 Function: "cospi":
 double: 2
-float: 2
 
 Function: "cospi_downward":
 double: 1
-float: 2
 
 Function: "cospi_towardzero":
 double: 1
-float: 1
 
 Function: "cospi_upward":
 double: 1
-float: 2
 
 Function: Real part of "cpow":
 double: 2
index 8aa52f46007a01657d0e6c28b65b1a472cc36886..39066956b03ec1ce18419c7916534514b969b870 100644 (file)
@@ -854,25 +854,21 @@ ldouble: 3
 
 Function: "cospi":
 double: 2
-float: 2
 float128: 2
 ldouble: 2
 
 Function: "cospi_downward":
 double: 1
-float: 2
 float128: 2
 ldouble: 2
 
 Function: "cospi_towardzero":
 double: 1
-float: 1
 float128: 2
 ldouble: 2
 
 Function: "cospi_upward":
 double: 1
-float: 2
 float128: 2
 ldouble: 2
 
index 80326368088f71db080c9377e104c6206e6c2367..a8c4723850bebf865e963ca9f81e0a3ed37ce806 100644 (file)
@@ -854,25 +854,21 @@ ldouble: 3
 
 Function: "cospi":
 double: 2
-float: 2
 float128: 2
 ldouble: 2
 
 Function: "cospi_downward":
 double: 1
-float: 2
 float128: 2
 ldouble: 2
 
 Function: "cospi_towardzero":
 double: 1
-float: 1
 float128: 2
 ldouble: 2
 
 Function: "cospi_upward":
 double: 1
-float: 2
 float128: 2
 ldouble: 2
 
diff --git a/sysdeps/ieee754/flt-32/s_cospif.c b/sysdeps/ieee754/flt-32/s_cospif.c
new file mode 100644 (file)
index 0000000..d4c652f
--- /dev/null
@@ -0,0 +1,136 @@
+/* Correctly-rounded cosine of binary32 value for angles in half-revolutions
+
+Copyright (c) 2022-2025 Alexei Sibidanov.
+
+The original version of this file was copied from the CORE-MATH
+project (src/binary32/cospi/cospif.c, revision f786e13).
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+*/
+
+#include <math.h>
+#include <stdint.h>
+#include <libm-alias-float.h>
+#include "math_config.h"
+
+float
+__cospif (float x)
+{
+  static const double sn[] =
+    {
+      0x1.921fb54442d0fp-37, -0x1.4abbce6102b94p-112, 0x1.4669fa3c58463p-189
+    };
+  static const double cn[] =
+    {
+      -0x1.3bd3cc9be45cfp-74, 0x1.03c1f08088742p-150, -0x1.55d1e5eff55a5p-228
+    };
+  /* S[i] approximates sin(i*pi/2^6) */
+  static const double S[] =
+    {
+       0x0p+0,                 0x1.91f65f10dd814p-5,    0x1.917a6bc29b42cp-4,
+       0x1.2c8106e8e613ap-3,   0x1.8f8b83c69a60bp-3,    0x1.f19f97b215f1bp-3,
+       0x1.294062ed59f06p-2,   0x1.58f9a75ab1fddp-2,    0x1.87de2a6aea963p-2,
+       0x1.b5d1009e15ccp-2,    0x1.e2b5d3806f63bp-2,    0x1.073879922ffeep-1,
+       0x1.1c73b39ae68c8p-1,   0x1.30ff7fce17035p-1,    0x1.44cf325091dd6p-1,
+       0x1.57d69348cecap-1,    0x1.6a09e667f3bcdp-1,    0x1.7b5df226aafafp-1,
+       0x1.8bc806b151741p-1,   0x1.9b3e047f38741p-1,    0x1.a9b66290ea1a3p-1,
+       0x1.b728345196e3ep-1,   0x1.c38b2f180bdb1p-1,    0x1.ced7af43cc773p-1,
+       0x1.d906bcf328d46p-1,   0x1.e212104f686e5p-1,    0x1.e9f4156c62ddap-1,
+       0x1.f0a7efb9230d7p-1,   0x1.f6297cff75cbp-1,     0x1.fa7557f08a517p-1,
+       0x1.fd88da3d12526p-1,   0x1.ff621e3796d7ep-1,    0x1p+0,
+       0x1.ff621e3796d7ep-1,   0x1.fd88da3d12526p-1,    0x1.fa7557f08a517p-1,
+       0x1.f6297cff75cbp-1,    0x1.f0a7efb9230d7p-1,    0x1.e9f4156c62ddap-1,
+       0x1.e212104f686e5p-1,   0x1.d906bcf328d46p-1,    0x1.ced7af43cc773p-1,
+       0x1.c38b2f180bdb1p-1,   0x1.b728345196e3ep-1,    0x1.a9b66290ea1a3p-1,
+       0x1.9b3e047f38741p-1,   0x1.8bc806b151741p-1,    0x1.7b5df226aafafp-1,
+       0x1.6a09e667f3bcdp-1,   0x1.57d69348cecap-1,     0x1.44cf325091dd6p-1,
+       0x1.30ff7fce17035p-1,   0x1.1c73b39ae68c8p-1,    0x1.073879922ffeep-1,
+       0x1.e2b5d3806f63bp-2,   0x1.b5d1009e15ccp-2,     0x1.87de2a6aea963p-2,
+       0x1.58f9a75ab1fddp-2,   0x1.294062ed59f06p-2,    0x1.f19f97b215f1bp-3,
+       0x1.8f8b83c69a60bp-3,   0x1.2c8106e8e613ap-3,    0x1.917a6bc29b42cp-4,
+       0x1.91f65f10dd814p-5,   0x0p+0,                 -0x1.91f65f10dd814p-5,
+      -0x1.917a6bc29b42cp-4,  -0x1.2c8106e8e613ap-3,   -0x1.8f8b83c69a60bp-3,
+      -0x1.f19f97b215f1bp-3,  -0x1.294062ed59f06p-2,   -0x1.58f9a75ab1fddp-2,
+      -0x1.87de2a6aea963p-2,  -0x1.b5d1009e15ccp-2,    -0x1.e2b5d3806f63bp-2,
+      -0x1.073879922ffeep-1,  -0x1.1c73b39ae68c8p-1,   -0x1.30ff7fce17035p-1,
+      -0x1.44cf325091dd6p-1,  -0x1.57d69348cecap-1,    -0x1.6a09e667f3bcdp-1,
+      -0x1.7b5df226aafafp-1,  -0x1.8bc806b151741p-1,   -0x1.9b3e047f38741p-1,
+      -0x1.a9b66290ea1a3p-1,  -0x1.b728345196e3ep-1,   -0x1.c38b2f180bdb1p-1,
+      -0x1.ced7af43cc773p-1,  -0x1.d906bcf328d46p-1,   -0x1.e212104f686e5p-1,
+      -0x1.e9f4156c62ddap-1,  -0x1.f0a7efb9230d7p-1,   -0x1.f6297cff75cbp-1,
+      -0x1.fa7557f08a517p-1,  -0x1.fd88da3d12526p-1,   -0x1.ff621e3796d7ep-1,
+      -0x1p+0,                -0x1.ff621e3796d7ep-1,   -0x1.fd88da3d12526p-1,
+      -0x1.fa7557f08a517p-1,  -0x1.f6297cff75cbp-1,    -0x1.f0a7efb9230d7p-1,
+      -0x1.e9f4156c62ddap-1,  -0x1.e212104f686e5p-1,   -0x1.d906bcf328d46p-1,
+      -0x1.ced7af43cc773p-1,  -0x1.c38b2f180bdb1p-1,   -0x1.b728345196e3ep-1,
+      -0x1.a9b66290ea1a3p-1,  -0x1.9b3e047f38741p-1,   -0x1.8bc806b151741p-1,
+      -0x1.7b5df226aafafp-1,  -0x1.6a09e667f3bcdp-1,   -0x1.57d69348cecap-1,
+      -0x1.44cf325091dd6p-1,  -0x1.30ff7fce17035p-1,   -0x1.1c73b39ae68c8p-1,
+      -0x1.073879922ffeep-1,  -0x1.e2b5d3806f63bp-2,   -0x1.b5d1009e15ccp-2,
+      -0x1.87de2a6aea963p-2,  -0x1.58f9a75ab1fddp-2,   -0x1.294062ed59f06p-2,
+      -0x1.f19f97b215f1bp-3,  -0x1.8f8b83c69a60bp-3,   -0x1.2c8106e8e613ap-3,
+      -0x1.917a6bc29b42cp-4,  -0x1.91f65f10dd814p-5
+    };
+
+  uint32_t ix = asuint (x);
+  int32_t e = (ix >> 23) & 0xff;
+  if (__glibc_unlikely (e == 0xff))
+    {
+      if (!(ix << 9))
+        return __math_invalidf (x);
+      return x + x; /* nan */
+    }
+  int32_t m = (ix & ~0u >> 9) | 1 << 23;
+  int32_t s = 143 - e;
+  int32_t p = e - 112;
+  if (__glibc_unlikely (p < 0)) /* |x| < 2^-15  */
+    {
+      uint32_t ax = ix & (~0u>>1);
+      /* Warning: -0x1.3bd3ccp+2f * x underflows for |x| < 0x1.9f03p-129 */
+      if (ax >= 0x19f030u)
+       return fmaf (-0x1.3bd3ccp+2f * x, x, 1.0f);
+      else /* |x| < 0x1.9f03p-129 */
+       return fmaf (-x, x, 1.0f);
+    }
+  if (__glibc_unlikely (p > 31))
+    {
+      if (__glibc_unlikely (p > 63))
+       return 1.0f;
+      int32_t iq = m << (p - 32);
+      return S[(iq + 32) & 127];
+    }
+  int32_t k = m << p;
+  if (__glibc_unlikely (k == 0))
+    {
+      int32_t iq = m >> (32 - p);
+      return S[(iq + 32) & 127];
+    }
+  double z = k;
+  double z2 = z * z;
+  double fs = sn[0] + z2 * (sn[1] + z2 * sn[2]);
+  double fc = cn[0] + z2 * (cn[1] + z2 * cn[2]);
+  uint32_t iq = m >> s;
+  iq = (iq + 1) >> 1;
+  uint32_t is = iq & 127, ic = (iq + 32) & 127;
+  double ts = S[ic], tc = S[is];
+  double r = ts + (ts * z2) * fc - (tc * z) * fs;
+  return r;
+}
+libm_alias_float (__cospi, cospi)
index 0cac55cbe41bef92f9f2cdd5b4cb9143bf9b370d..fe84c609133fb07fbaabb4c5817c9600438bacc8 100644 (file)
@@ -701,22 +701,18 @@ ldouble: 3
 
 Function: "cospi":
 double: 2
-float: 2
 ldouble: 2
 
 Function: "cospi_downward":
 double: 1
-float: 2
 ldouble: 2
 
 Function: "cospi_towardzero":
 double: 1
-float: 1
 ldouble: 2
 
 Function: "cospi_upward":
 double: 1
-float: 2
 ldouble: 2
 
 Function: Real part of "cpow":
index 1b5bcff11e14ca57a769d748ed507fe49b2ccea5..ddc78d023973f6975995ab0b756d450996d315aa 100644 (file)
@@ -701,22 +701,18 @@ ldouble: 3
 
 Function: "cospi":
 double: 2
-float: 2
 ldouble: 2
 
 Function: "cospi_downward":
 double: 1
-float: 2
 ldouble: 2
 
 Function: "cospi_towardzero":
 double: 1
-float: 1
 ldouble: 2
 
 Function: "cospi_upward":
 double: 1
-float: 2
 ldouble: 2
 
 Function: Real part of "cpow":
index a608e3c949af10ba92268dbd402434f4b89e7a78..884b4cc361c9faed95d4217c58cdaa8e42198c5f 100644 (file)
@@ -545,19 +545,15 @@ double: 2
 
 Function: "cospi":
 double: 2
-float: 2
 
 Function: "cospi_downward":
 double: 1
-float: 2
 
 Function: "cospi_towardzero":
 double: 1
-float: 1
 
 Function: "cospi_upward":
 double: 1
-float: 2
 
 Function: Real part of "cpow":
 double: 2
index 56986f0be02744ecedea8c60c8bf81f719a457e4..aec66e0fa3479f106ce9d7e425f6010ccfe9ca0a 100644 (file)
@@ -509,7 +509,6 @@ double: 2
 
 Function: "cospi":
 double: 2
-float: 2
 
 Function: Real part of "cpow":
 double: 2
index 630111e6c45a468c792fc5326d359addf5db6582..bdf0c98dc7aac8fce99c5562767d71cabc5b62a1 100644 (file)
@@ -858,25 +858,21 @@ ldouble: 2
 
 Function: "cospi":
 double: 2
-float: 2
 float128: 2
 ldouble: 2
 
 Function: "cospi_downward":
 double: 1
-float: 2
 float128: 2
 ldouble: 4
 
 Function: "cospi_towardzero":
 double: 1
-float: 1
 float128: 2
 ldouble: 6
 
 Function: "cospi_upward":
 double: 1
-float: 2
 float128: 2
 ldouble: 6
 
index 087dcd79fc849b471623ce182b89856f9e15bcb5..08af2495f35547f444fdd83456eb9cadea20aaf3 100644 (file)
@@ -650,7 +650,6 @@ ldouble: 3
 
 Function: "cospi":
 double: 2
-float: 2
 ldouble: 2
 
 Function: Real part of "cpow":
index efd83affa498f690788a2da1dc944ffc188284fa..6644e38ebc2bbe3dbeaa89fb85fabaa57ee9fda4 100644 (file)
@@ -709,22 +709,18 @@ ldouble: 3
 
 Function: "cospi":
 double: 2
-float: 2
 ldouble: 2
 
 Function: "cospi_downward":
 double: 1
-float: 2
 ldouble: 2
 
 Function: "cospi_towardzero":
 double: 1
-float: 1
 ldouble: 2
 
 Function: "cospi_upward":
 double: 1
-float: 2
 ldouble: 2
 
 Function: Real part of "cpow":
index 709debb205bb5b8fd15e833637324efd8192fa57..6318760eb5f56232010a01bbe6341e77604de994 100644 (file)
@@ -701,22 +701,18 @@ ldouble: 3
 
 Function: "cospi":
 double: 2
-float: 2
 ldouble: 2
 
 Function: "cospi_downward":
 double: 1
-float: 2
 ldouble: 2
 
 Function: "cospi_towardzero":
 double: 1
-float: 1
 ldouble: 2
 
 Function: "cospi_upward":
 double: 1
-float: 2
 ldouble: 2
 
 Function: Real part of "cpow":
index becf5da3d61529a97c7f8c3f63d34e72f62279ac..2c319f8ae2d84d706e31053c9d0c3add19b13e2f 100644 (file)
@@ -701,22 +701,18 @@ ldouble: 3
 
 Function: "cospi":
 double: 2
-float: 2
 ldouble: 2
 
 Function: "cospi_downward":
 double: 1
-float: 2
 ldouble: 2
 
 Function: "cospi_towardzero":
 double: 1
-float: 1
 ldouble: 2
 
 Function: "cospi_upward":
 double: 1
-float: 2
 ldouble: 2
 
 Function: Real part of "cpow":
index 8c5d4fd4710d4f3516d9507417b72e9823021bfc..e2cf3e04b6f4636d249aae8ef81dbbf9b5e68af6 100644 (file)
@@ -1050,25 +1050,21 @@ float: 2
 
 Function: "cospi":
 double: 2
-float: 2
 float128: 2
 ldouble: 2
 
 Function: "cospi_downward":
 double: 1
-float: 2
 float128: 2
 ldouble: 2
 
 Function: "cospi_towardzero":
 double: 1
-float: 1
 float128: 2
 ldouble: 2
 
 Function: "cospi_upward":
 double: 1
-float: 2
 float128: 2
 ldouble: 2