]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
math: Use sinpif from CORE-MATH
authorAdhemerval Zanella <adhemerval.zanella@linaro.org>
Fri, 20 Dec 2024 16:31:56 +0000 (13:31 -0300)
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>
Wed, 12 Feb 2025 19:31:57 +0000 (16:31 -0300)
The CORE-MATH implementation is correctly rounded (for any rounding mode)
and shows better performance to the generic sinpif.

The code was adapted to glibc style and to use the definition of
math_config.h (to handle errno, overflow, and underflow).

Benchtest on x64_64 (Ryzen 9 5900X, gcc 14.2.1), aarch64 (Neoverse-N1,
gcc 13.3.1), and powerpc (POWER10, gcc 13.2.1):

latency                      master        patched   improvement
x86_64                      47.5710        38.4455        19.18%
x86_64v2                    46.8828        40.7563        13.07%
x86_64v3                    44.0034        34.1497        22.39%
aarch64 (Neoverse)          19.2493        14.1968        26.25%
power8                      23.5312        16.3854        30.37%
power10                     22.6485        10.2888        54.57%

reciprocal-throughput        master        patched   improvement
x86_64                      21.8858        11.6717        46.67%
x86_64v2                    22.0620        11.9853        45.67%
x86_64v3                    21.5653        11.3291        47.47%
aarch64 (Neoverse)          13.0615         6.5499        49.85%
power8                      16.2030         6.9580        57.06%
power10                     12.8911         4.2858        66.75%

Reviewed-by: DJ Delorie <dj@redhat.com>
19 files changed:
SHARED-FILES
sysdeps/aarch64/libm-test-ulps
sysdeps/arc/fpu/libm-test-ulps
sysdeps/arc/nofpu/libm-test-ulps
sysdeps/arm/libm-test-ulps
sysdeps/hppa/fpu/libm-test-ulps
sysdeps/i386/fpu/libm-test-ulps
sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
sysdeps/ieee754/flt-32/s_sinpif.c [new file with mode: 0644]
sysdeps/loongarch/lp64/libm-test-ulps
sysdeps/mips/mips64/libm-test-ulps
sysdeps/or1k/fpu/libm-test-ulps
sysdeps/or1k/nofpu/libm-test-ulps
sysdeps/powerpc/fpu/libm-test-ulps
sysdeps/riscv/nofpu/libm-test-ulps
sysdeps/riscv/rvd/libm-test-ulps
sysdeps/s390/fpu/libm-test-ulps
sysdeps/sparc/fpu/libm-test-ulps
sysdeps/x86_64/fpu/libm-test-ulps

index 3ce38d154293dfb1f1a7adeba1388c3b7dc6cfa1..c108f3b308d855c493b6ceb89066203fbc0a6124 100644 (file)
@@ -354,3 +354,7 @@ sysdeps/ieee754/flt-32/s_cospif.c:
   (src/binary32/cospi/cospif.c in CORE-MATH)
   - the code was adapted to use glibc code style and internal
     functions to handle errno, overflow, and underflow.
+sysdeps/ieee754/flt-32/s_sinpif.c:
+  (src/binary32/sinpi/sinpif.c in CORE-MATH)
+  - the code was adapted to use glibc code style and internal
+    functions to handle errno, overflow, and underflow.
index a15f3effa0559fce47f7c5cd2396272c1f2f90fa..c6c93aa0e43c0445530580b427a969d0b733a4e9 100644 (file)
@@ -1610,7 +1610,6 @@ ldouble: 4
 
 Function: "sinpi":
 double: 2
-float: 2
 ldouble: 2
 
 Function: "sinpi_advsimd":
@@ -1619,7 +1618,6 @@ float: 2
 
 Function: "sinpi_downward":
 double: 2
-float: 2
 ldouble: 2
 
 Function: "sinpi_sve":
@@ -1628,12 +1626,10 @@ float: 2
 
 Function: "sinpi_towardzero":
 double: 2
-float: 1
 ldouble: 2
 
 Function: "sinpi_upward":
 double: 2
-float: 3
 ldouble: 2
 
 Function: "tan":
index f2acbf453e899a51a949370f0d27e4b9d6b35f22..65ebf6f9a0fe33e5f5e0dc02846aea692c401482 100644 (file)
@@ -1101,19 +1101,15 @@ double: 3
 
 Function: "sinpi":
 double: 2
-float: 2
 
 Function: "sinpi_downward":
 double: 2
-float: 2
 
 Function: "sinpi_towardzero":
 double: 2
-float: 1
 
 Function: "sinpi_upward":
 double: 2
-float: 3
 
 Function: "tan":
 double: 1
index 8716e5d29ee22fb8b6600bcd8b6dba64ad005922..3ba4f01cbffa9f647b2de5d37b105080af0f9131 100644 (file)
@@ -265,7 +265,6 @@ double: 2
 
 Function: "sinpi":
 double: 2
-float: 2
 
 Function: "tanh":
 double: 2
index 647f92944c58b0a3b03752806c02af7898f17a2b..f887712d8e2a920914854595b276795443440bf6 100644 (file)
@@ -1097,19 +1097,15 @@ double: 3
 
 Function: "sinpi":
 double: 2
-float: 2
 
 Function: "sinpi_downward":
 double: 2
-float: 2
 
 Function: "sinpi_towardzero":
 double: 2
-float: 1
 
 Function: "sinpi_upward":
 double: 2
-float: 3
 
 Function: "tan_downward":
 double: 1
index 88f7701c0e541e69706d86dba3b282ee77a55169..10f7f2ebdea4f9d54cc0c24d909904570b48e2c7 100644 (file)
@@ -1123,19 +1123,15 @@ double: 3
 
 Function: "sinpi":
 double: 2
-float: 2
 
 Function: "sinpi_downward":
 double: 2
-float: 2
 
 Function: "sinpi_towardzero":
 double: 2
-float: 1
 
 Function: "sinpi_upward":
 double: 2
-float: 3
 
 Function: "tan":
 double: 1
index 39066956b03ec1ce18419c7916534514b969b870..77aa7155dbe16dfcef5fd3a3eecc44a53d47cfe4 100644 (file)
@@ -1691,25 +1691,21 @@ ldouble: 5
 
 Function: "sinpi":
 double: 2
-float: 2
 float128: 2
 ldouble: 2
 
 Function: "sinpi_downward":
 double: 2
-float: 2
 float128: 2
 ldouble: 2
 
 Function: "sinpi_towardzero":
 double: 2
-float: 1
 float128: 2
 ldouble: 2
 
 Function: "sinpi_upward":
 double: 2
-float: 3
 float128: 2
 ldouble: 2
 
index a8c4723850bebf865e963ca9f81e0a3ed37ce806..7168d577d89e7d0c64accb32f6ddc8791ca2242a 100644 (file)
@@ -1696,25 +1696,21 @@ ldouble: 5
 
 Function: "sinpi":
 double: 2
-float: 2
 float128: 2
 ldouble: 2
 
 Function: "sinpi_downward":
 double: 2
-float: 2
 float128: 2
 ldouble: 3
 
 Function: "sinpi_towardzero":
 double: 2
-float: 1
 float128: 2
 ldouble: 2
 
 Function: "sinpi_upward":
 double: 2
-float: 3
 float128: 2
 ldouble: 2
 
diff --git a/sysdeps/ieee754/flt-32/s_sinpif.c b/sysdeps/ieee754/flt-32/s_sinpif.c
new file mode 100644 (file)
index 0000000..772ea42
--- /dev/null
@@ -0,0 +1,134 @@
+/* Correctly-rounded sine of binary32 value for angles in half-revolutions
+
+Copyright (c) 2022-2025 Alexei Sibidanov.
+
+The original version of this file was copied from the CORE-MATH
+project (src/binary32/sinpi/sinpif.c, revision f786e13).
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+#include <math.h>
+#include <stdint.h>
+#include <errno.h>
+#include <fenv.h>
+#include <libm-alias-float.h>
+#include "math_config.h"
+
+float
+__sinpif (float x)
+{
+  static const double sn[] =
+    {
+      0x1.921fb54442d0fp-37, -0x1.4abbce6102b94p-112, 0x1.4669fa3c58463p-189
+    };
+  static const double cn[] =
+    {
+      -0x1.3bd3cc9be45cfp-74, 0x1.03c1f08088742p-150, -0x1.55d1e5eff55a5p-228
+    };
+  static const double S[] =
+    {
+       0x0p+0,                 0x1.91f65f10dd814p-5,    0x1.917a6bc29b42cp-4,
+       0x1.2c8106e8e613ap-3,   0x1.8f8b83c69a60bp-3,    0x1.f19f97b215f1bp-3,
+       0x1.294062ed59f06p-2,   0x1.58f9a75ab1fddp-2,    0x1.87de2a6aea963p-2,
+       0x1.b5d1009e15ccp-2,    0x1.e2b5d3806f63bp-2,    0x1.073879922ffeep-1,
+       0x1.1c73b39ae68c8p-1,   0x1.30ff7fce17035p-1,    0x1.44cf325091dd6p-1,
+       0x1.57d69348cecap-1,    0x1.6a09e667f3bcdp-1,    0x1.7b5df226aafafp-1,
+       0x1.8bc806b151741p-1,   0x1.9b3e047f38741p-1,    0x1.a9b66290ea1a3p-1,
+       0x1.b728345196e3ep-1,   0x1.c38b2f180bdb1p-1,    0x1.ced7af43cc773p-1,
+       0x1.d906bcf328d46p-1,   0x1.e212104f686e5p-1,    0x1.e9f4156c62ddap-1,
+       0x1.f0a7efb9230d7p-1,   0x1.f6297cff75cbp-1,     0x1.fa7557f08a517p-1,
+       0x1.fd88da3d12526p-1,   0x1.ff621e3796d7ep-1,    0x1p+0,
+       0x1.ff621e3796d7ep-1,   0x1.fd88da3d12526p-1,    0x1.fa7557f08a517p-1,
+       0x1.f6297cff75cbp-1,    0x1.f0a7efb9230d7p-1,    0x1.e9f4156c62ddap-1,
+       0x1.e212104f686e5p-1,   0x1.d906bcf328d46p-1,    0x1.ced7af43cc773p-1,
+       0x1.c38b2f180bdb1p-1,   0x1.b728345196e3ep-1,    0x1.a9b66290ea1a3p-1,
+       0x1.9b3e047f38741p-1,   0x1.8bc806b151741p-1,    0x1.7b5df226aafafp-1,
+       0x1.6a09e667f3bcdp-1,   0x1.57d69348cecap-1,     0x1.44cf325091dd6p-1,
+       0x1.30ff7fce17035p-1,   0x1.1c73b39ae68c8p-1,    0x1.073879922ffeep-1,
+       0x1.e2b5d3806f63bp-2,   0x1.b5d1009e15ccp-2,     0x1.87de2a6aea963p-2,
+       0x1.58f9a75ab1fddp-2,   0x1.294062ed59f06p-2,    0x1.f19f97b215f1bp-3,
+       0x1.8f8b83c69a60bp-3,   0x1.2c8106e8e613ap-3,    0x1.917a6bc29b42cp-4,
+       0x1.91f65f10dd814p-5,   0x0p+0,                 -0x1.91f65f10dd814p-5,
+      -0x1.917a6bc29b42cp-4,  -0x1.2c8106e8e613ap-3,   -0x1.8f8b83c69a60bp-3,
+      -0x1.f19f97b215f1bp-3,  -0x1.294062ed59f06p-2,   -0x1.58f9a75ab1fddp-2,
+      -0x1.87de2a6aea963p-2,  -0x1.b5d1009e15ccp-2,    -0x1.e2b5d3806f63bp-2,
+      -0x1.073879922ffeep-1,  -0x1.1c73b39ae68c8p-1,   -0x1.30ff7fce17035p-1,
+      -0x1.44cf325091dd6p-1,  -0x1.57d69348cecap-1,    -0x1.6a09e667f3bcdp-1,
+      -0x1.7b5df226aafafp-1,  -0x1.8bc806b151741p-1,   -0x1.9b3e047f38741p-1,
+      -0x1.a9b66290ea1a3p-1,  -0x1.b728345196e3ep-1,   -0x1.c38b2f180bdb1p-1,
+      -0x1.ced7af43cc773p-1,  -0x1.d906bcf328d46p-1,   -0x1.e212104f686e5p-1,
+      -0x1.e9f4156c62ddap-1,  -0x1.f0a7efb9230d7p-1,   -0x1.f6297cff75cbp-1,
+      -0x1.fa7557f08a517p-1,  -0x1.fd88da3d12526p-1,   -0x1.ff621e3796d7ep-1,
+      -0x1p+0,                -0x1.ff621e3796d7ep-1,   -0x1.fd88da3d12526p-1,
+      -0x1.fa7557f08a517p-1,  -0x1.f6297cff75cbp-1,    -0x1.f0a7efb9230d7p-1,
+      -0x1.e9f4156c62ddap-1,  -0x1.e212104f686e5p-1,   -0x1.d906bcf328d46p-1,
+      -0x1.ced7af43cc773p-1,  -0x1.c38b2f180bdb1p-1,   -0x1.b728345196e3ep-1,
+      -0x1.a9b66290ea1a3p-1,  -0x1.9b3e047f38741p-1,   -0x1.8bc806b151741p-1,
+      -0x1.7b5df226aafafp-1,  -0x1.6a09e667f3bcdp-1,   -0x1.57d69348cecap-1,
+      -0x1.44cf325091dd6p-1,  -0x1.30ff7fce17035p-1,   -0x1.1c73b39ae68c8p-1,
+      -0x1.073879922ffeep-1,  -0x1.e2b5d3806f63bp-2,   -0x1.b5d1009e15ccp-2,
+      -0x1.87de2a6aea963p-2,  -0x1.58f9a75ab1fddp-2,   -0x1.294062ed59f06p-2,
+      -0x1.f19f97b215f1bp-3,  -0x1.8f8b83c69a60bp-3,   -0x1.2c8106e8e613ap-3,
+      -0x1.917a6bc29b42cp-4,  -0x1.91f65f10dd814p-5
+    };
+
+  uint32_t ix = asuint (x);
+  int32_t e = (ix >> 23) & 0xff;
+  if (__glibc_unlikely (e == 0xff))
+    {
+      if (!(ix << 9))
+        return __math_invalidf (x);
+      return x + x; /* nan */
+    }
+  int32_t m = (ix & ~0u >> 9) | 1 << 23, sgn = ix;
+  sgn >>= 31;
+  m = (m ^ sgn) - sgn;
+  int32_t s = 143 - e;
+  if (__glibc_unlikely (s < 0))
+    {
+      if (__glibc_unlikely (s < -6))
+       return copysignf (0.0f, x);
+      int32_t iq = m << (-s - 1);
+      iq &= 127;
+      if (iq == 0 || iq == 64)
+       return copysignf (0.0f, x);
+      return S[iq];
+    }
+  else if (__glibc_unlikely (s > 30))
+    {
+      double z = x, z2 = z * z;
+      return z * (0x1.921fb54442d18p+1 + z2 * (-0x1.4abbce625be53p+2));
+    }
+  int32_t si = 25 - s;
+  if (__glibc_unlikely (si >= 0 && (m << si) == 0))
+    return copysignf (0.0f, x);
+
+  int32_t k = m << (31 - s);
+  double z = k, z2 = z * z;
+  double fs = sn[0] + z2 * (sn[1] + z2 * sn[2]);
+  double fc = cn[0] + z2 * (cn[1] + z2 * cn[2]);
+  uint32_t iq = m >> s;
+  iq = (iq + 1) >> 1;
+  uint32_t is = iq & 127, ic = (iq + 32) & 127;
+  double ts = S[is], tc = S[ic];
+  double r = ts + (ts * z2) * fc + (tc * z) * fs;
+  return r;
+}
+libm_alias_float (__sinpi, sinpi)
index fe84c609133fb07fbaabb4c5817c9600438bacc8..4fadba43c25ad0edde7ae333a7ec0ba26ef75c85 100644 (file)
@@ -1389,22 +1389,18 @@ ldouble: 4
 
 Function: "sinpi":
 double: 2
-float: 2
 ldouble: 2
 
 Function: "sinpi_downward":
 double: 2
-float: 2
 ldouble: 2
 
 Function: "sinpi_towardzero":
 double: 2
-float: 1
 ldouble: 2
 
 Function: "sinpi_upward":
 double: 2
-float: 3
 ldouble: 2
 
 Function: "tan":
index ddc78d023973f6975995ab0b756d450996d315aa..5177b54557c6488125e9c70c548c323b13f07162 100644 (file)
@@ -1402,22 +1402,18 @@ ldouble: 4
 
 Function: "sinpi":
 double: 2
-float: 2
 ldouble: 2
 
 Function: "sinpi_downward":
 double: 2
-float: 2
 ldouble: 2
 
 Function: "sinpi_towardzero":
 double: 2
-float: 1
 ldouble: 2
 
 Function: "sinpi_upward":
 double: 2
-float: 3
 ldouble: 2
 
 Function: "tan":
index 884b4cc361c9faed95d4217c58cdaa8e42198c5f..1fb4ec57c0f339c6f276af11946ccdba58f9aca1 100644 (file)
@@ -1082,19 +1082,15 @@ double: 3
 
 Function: "sinpi":
 double: 2
-float: 2
 
 Function: "sinpi_downward":
 double: 2
-float: 2
 
 Function: "sinpi_towardzero":
 double: 2
-float: 1
 
 Function: "sinpi_upward":
 double: 2
-float: 3
 
 Function: "tan_downward":
 double: 1
index aec66e0fa3479f106ce9d7e425f6010ccfe9ca0a..aff11b5148988b112b0579af4fd3518d9a1c359f 100644 (file)
@@ -991,7 +991,6 @@ double: 3
 
 Function: "sinpi":
 double: 2
-float: 2
 
 Function: "tan_downward":
 double: 1
index bdf0c98dc7aac8fce99c5562767d71cabc5b62a1..e59c3e47ef61b334c8618b0418baedf525c4e911 100644 (file)
@@ -1779,25 +1779,21 @@ ldouble: 6
 
 Function: "sinpi":
 double: 2
-float: 2
 float128: 2
 ldouble: 2
 
 Function: "sinpi_downward":
 double: 2
-float: 2
 float128: 2
 ldouble: 5
 
 Function: "sinpi_towardzero":
 double: 2
-float: 1
 float128: 2
 ldouble: 8
 
 Function: "sinpi_upward":
 double: 2
-float: 3
 float128: 2
 ldouble: 8
 
index 08af2495f35547f444fdd83456eb9cadea20aaf3..2545d0e1665d345d7d62ca4f4bc26d1fef587c4a 100644 (file)
@@ -1271,7 +1271,6 @@ ldouble: 4
 
 Function: "sinpi":
 double: 2
-float: 2
 ldouble: 2
 
 Function: "tan":
index 6644e38ebc2bbe3dbeaa89fb85fabaa57ee9fda4..94534a4f808114dfcadcb4d27f1be7eb09eb6d0b 100644 (file)
@@ -1401,22 +1401,18 @@ ldouble: 4
 
 Function: "sinpi":
 double: 2
-float: 2
 ldouble: 2
 
 Function: "sinpi_downward":
 double: 2
-float: 2
 ldouble: 2
 
 Function: "sinpi_towardzero":
 double: 2
-float: 1
 ldouble: 2
 
 Function: "sinpi_upward":
 double: 2
-float: 3
 ldouble: 2
 
 Function: "tan":
index 6318760eb5f56232010a01bbe6341e77604de994..2c89048b569e4d0e970966ae1b27be059fdfb14f 100644 (file)
@@ -1387,22 +1387,18 @@ ldouble: 4
 
 Function: "sinpi":
 double: 2
-float: 2
 ldouble: 2
 
 Function: "sinpi_downward":
 double: 2
-float: 2
 ldouble: 2
 
 Function: "sinpi_towardzero":
 double: 2
-float: 1
 ldouble: 2
 
 Function: "sinpi_upward":
 double: 2
-float: 3
 ldouble: 2
 
 Function: "tan":
index 2c319f8ae2d84d706e31053c9d0c3add19b13e2f..3af2355545122d729c81d0571cd20516acdb2400 100644 (file)
@@ -1402,22 +1402,18 @@ ldouble: 4
 
 Function: "sinpi":
 double: 2
-float: 2
 ldouble: 2
 
 Function: "sinpi_downward":
 double: 2
-float: 2
 ldouble: 2
 
 Function: "sinpi_towardzero":
 double: 2
-float: 1
 ldouble: 2
 
 Function: "sinpi_upward":
 double: 2
-float: 3
 ldouble: 2
 
 Function: "tan":
index e2cf3e04b6f4636d249aae8ef81dbbf9b5e68af6..f6da5ba1864c9cf8bb12d861e78b135cba826b02 100644 (file)
@@ -2197,25 +2197,21 @@ float: 1
 
 Function: "sinpi":
 double: 2
-float: 2
 float128: 2
 ldouble: 2
 
 Function: "sinpi_downward":
 double: 2
-float: 2
 float128: 2
 ldouble: 3
 
 Function: "sinpi_towardzero":
 double: 2
-float: 1
 float128: 2
 ldouble: 2
 
 Function: "sinpi_upward":
 double: 2
-float: 3
 float128: 2
 ldouble: 2