From: Adhemerval Zanella Date: Thu, 2 Oct 2025 11:55:46 +0000 (-0300) Subject: math: Optimize dbl-64 remainder implementation X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f0facb2d27e50bbf2a97f17541863c6025d23bf0;p=thirdparty%2Fglibc.git math: Optimize dbl-64 remainder implementation The commit 34b9f8bc17 provides an optimized fmod implementation; use the same strategy used for remainderf and implement the double variant on top of fmod. I see the following performance improvements using remainder benchtests (using reciprocal-throughput metric): Architecture | Input | master | patch | Improvemnt -----------------|-----------------|----------|----------------------- x86_64 | subnormals | 76.1345 | 21.5334 | 71.72% x86_64 | normal | 553.2670 | 426.5670 | 22.90% x86_64 | close-exponent | 30.5111 | 22.6893 | 25.64% aarch64 | subnormals | 26.0734 | 8.4876 | 67.45% aarch64 | normal | 205.2590 | 200.082 | 2.52% aarch64 | close-exponent | 13.8481 | 13.6663 | 1.31% The aarch64 used as Neoverse-N1, gcc 15.1.1; while the x86_64 was a AMD Ryzen 9 5900X, gcc 15.2.1. This implementation also fixes the math/test-double-remainder issues on alpha. Tested on aarch64-linux-gnu and x86_64-linux-gnu. Reviewed-by: Wilco Dijkstra --- diff --git a/sysdeps/ieee754/dbl-64/e_remainder.c b/sysdeps/ieee754/dbl-64/e_remainder.c index 04a30e2138..dbae3aab81 100644 --- a/sysdeps/ieee754/dbl-64/e_remainder.c +++ b/sysdeps/ieee754/dbl-64/e_remainder.c @@ -1,153 +1,73 @@ -/* - * IBM Accurate Mathematical Library - * written by International Business Machines Corp. - * Copyright (C) 2001-2025 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, see . - */ -/**************************************************************************/ -/* MODULE_NAME urem.c */ -/* */ -/* FUNCTION: uremainder */ -/* */ -/* An ultimate remainder routine. Given two IEEE double machine numbers x */ -/* ,y it computes the correctly rounded (to nearest) value of remainder */ -/* of dividing x by y. */ -/* Assumption: Machine arithmetic operations are performed in */ -/* round to nearest mode of IEEE 754 standard. */ -/* */ -/* ************************************************************************/ +/* Remainder function, double version. + Copyright (C) 2008-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ -#include "endian.h" -#include "mydefs.h" -#include "urem.h" #include -#include -#include #include +#include "math_config.h" -/**************************************************************************/ -/* An ultimate remainder routine. Given two IEEE double machine numbers x */ -/* ,y it computes the correctly rounded (to nearest) value of remainder */ -/**************************************************************************/ double __ieee754_remainder (double x, double y) { - double z, d, xx; - int4 kx, ky, n, nn, n1, m1, l; - mynumber u, t, w = { { 0, 0 } }, v = { { 0, 0 } }, ww = { { 0, 0 } }, r; - u.x = x; - t.x = y; - kx = u.i[HIGH_HALF] & 0x7fffffff; /* no sign for x*/ - t.i[HIGH_HALF] &= 0x7fffffff; /*no sign for y */ - ky = t.i[HIGH_HALF]; - /*------ |x| < 2^1023 and 2^-970 < |y| < 2^1024 ------------------*/ - if (kx < 0x7fe00000 && ky < 0x7ff00000 && ky >= 0x03500000) + uint64_t hx = asuint64 (x); + uint64_t hy = asuint64 (y); + uint64_t sx = hx >> 63; + + hx &= ~SIGN_MASK; + hy &= ~SIGN_MASK; + + /* |y| < DBL_MAX / 2 ? */ + y = fabs (y); + if (__glibc_likely (hy < UINT64_C (0x7fe0000000000000))) { - SET_RESTORE_ROUND_NOEX (FE_TONEAREST); - if (kx + 0x00100000 < ky) - return x; - if ((kx - 0x01500000) < ky) - { - z = x / t.x; - v.i[HIGH_HALF] = t.i[HIGH_HALF]; - d = (z + big.x) - big.x; - xx = (x - d * v.x) - d * (t.x - v.x); - if (d - z != 0.5 && d - z != -0.5) - return (xx != 0) ? xx : ((x > 0) ? ZERO.x : nZERO.x); - else - { - if (fabs (xx) > 0.5 * t.x) - return (z > d) ? xx - t.x : xx + t.x; - else - return xx; - } - } /* (kx<(ky+0x01500000)) */ - else + /* |x| not finite, |y| equal 0 is handled by fmod. */ + if (__glibc_unlikely (hx >= EXPONENT_MASK)) + return (x * y) / (x * y); + + x = fabs (__ieee754_fmod (x, y + y)); + if (x + x > y) { - r.x = 1.0 / t.x; - n = t.i[HIGH_HALF]; - nn = (n & 0x7ff00000) + 0x01400000; - w.i[HIGH_HALF] = n; - ww.x = t.x - w.x; - l = (kx - nn) & 0xfff00000; - n1 = ww.i[HIGH_HALF]; - m1 = r.i[HIGH_HALF]; - while (l > 0) - { - r.i[HIGH_HALF] = m1 - l; - z = u.x * r.x; - w.i[HIGH_HALF] = n + l; - ww.i[HIGH_HALF] = (n1) ? n1 + l : n1; - d = (z + big.x) - big.x; - u.x = (u.x - d * w.x) - d * ww.x; - l = (u.i[HIGH_HALF] & 0x7ff00000) - nn; - } - r.i[HIGH_HALF] = m1; - w.i[HIGH_HALF] = n; - ww.i[HIGH_HALF] = n1; - z = u.x * r.x; - d = (z + big.x) - big.x; - u.x = (u.x - d * w.x) - d * ww.x; - if (fabs (u.x) < 0.5 * t.x) - return (u.x != 0) ? u.x : ((x > 0) ? ZERO.x : nZERO.x); - else - if (fabs (u.x) > 0.5 * t.x) - return (d > z) ? u.x + t.x : u.x - t.x; - else - { - z = u.x / t.x; d = (z + big.x) - big.x; - return ((u.x - d * w.x) - d * ww.x); - } + x -= y; + if (x + x >= y) + x -= y; + /* Make sure x is not -0. This can occur only when x = y + and rounding direction is towards negative infinity. */ + else if (x == 0.0) + x = 0.0; } - } /* (kx<0x7fe00000&&ky<0x7ff00000&&ky>=0x03500000) */ + } else { - if (kx < 0x7fe00000 && ky < 0x7ff00000 && (ky > 0 || t.i[LOW_HALF] != 0)) - { - y = fabs (y) * t128.x; - z = __ieee754_remainder (x, y) * t128.x; - z = __ieee754_remainder (z, y) * tm128.x; - return z; - } - else + /* |x| not finite or |y| is NaN or 0 */ + if ((hx >= EXPONENT_MASK || (hy - 1) >= EXPONENT_MASK)) + return (x * y) / (x * y); + + x = fabs (x); + double y_half = y * 0.5; + if (x > y_half) { - if ((kx & 0x7ff00000) == 0x7fe00000 && ky < 0x7ff00000 && - (ky > 0 || t.i[LOW_HALF] != 0)) - { - y = fabs (y); - z = 2.0 * __ieee754_remainder (0.5 * x, y); - d = fabs (z); - if (d <= fabs (d - y)) - return z; - else if (d == y) - return 0.0 * x; - else - return (z > 0) ? z - y : z + y; - } - else /* if x is too big */ - { - if (ky == 0 && t.i[LOW_HALF] == 0) /* y = 0 */ - return (x * y) / (x * y); - else if (kx >= 0x7ff00000 /* x not finite */ - || (ky > 0x7ff00000 /* y is NaN */ - || (ky == 0x7ff00000 && t.i[LOW_HALF] != 0))) - return (x * y) / (x * y); - else - return x; - } + x -= y; + if (x >= y_half) + x -= y; + else if (x == 0.0) + x = 0.0; } } + + return sx ? -x : x; } libm_alias_finite (__ieee754_remainder, __remainder) diff --git a/sysdeps/ieee754/dbl-64/urem.h b/sysdeps/ieee754/dbl-64/urem.h deleted file mode 100644 index 38e1fc3200..0000000000 --- a/sysdeps/ieee754/dbl-64/urem.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * IBM Accurate Mathematical Library - * Copyright (C) 2001-2025 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, see . - */ - -/************************************************************************/ -/* MODULE_NAME: urem.h */ -/* */ -/* */ -/* common data and variables definition for BIG or LITTLE ENDIAN */ -/************************************************************************/ - -#ifndef UREM_H -#define UREM_H - -#ifdef BIG_ENDI -static const mynumber big = {{0x43380000, 0}}, /* 6755399441055744 */ - t128 = {{0x47f00000, 0}}, /* 2^ 128 */ - tm128 = {{0x37f00000, 0}}, /* 2^-128 */ - ZERO = {{0, 0}}, /* 0.0 */ - nZERO = {{0x80000000, 0}}; /* -0.0 */ -#else -#ifdef LITTLE_ENDI -static const mynumber big = {{0, 0x43380000}}, /* 6755399441055744 */ - t128 = {{0, 0x47f00000}}, /* 2^ 128 */ - tm128 = {{0, 0x37f00000}}, /* 2^-128 */ - ZERO = {{0, 0}}, /* 0.0 */ - nZERO = {{0, 0x80000000}}; /* -0.0 */ -#endif -#endif - -#endif