From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Thu, 2 Oct 2025 11:55:46 +0000 (-0300)
Subject: math: Optimize dbl-64 remainder implementation
X-Git-Tag: glibc-2.43~466
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f0facb2d27e50bbf2a97f17541863c6025d23bf0;p=thirdparty%2Fglibc.git

math: Optimize dbl-64 remainder implementation

The commit 34b9f8bc17 provides an optimized fmod implementation; use
the same strategy used for remainderf and implement the double variant
on top of fmod.

I see the following performance improvements using remainder benchtests
(using reciprocal-throughput metric):

Architecture     | Input           |   master |   patch  | Improvemnt
-----------------|-----------------|----------|-----------------------
x86_64           | subnormals      |  76.1345 |  21.5334 |     71.72%
x86_64           | normal          | 553.2670 | 426.5670 |     22.90%
x86_64           | close-exponent  |  30.5111 |  22.6893 |     25.64%
aarch64          | subnormals      |  26.0734 |   8.4876 |     67.45%
aarch64          | normal          | 205.2590 |  200.082 |      2.52%
aarch64          | close-exponent  |  13.8481 |  13.6663 |      1.31%

The aarch64 used as Neoverse-N1, gcc 15.1.1; while the x86_64 was
a AMD Ryzen 9 5900X, gcc 15.2.1.

This implementation also fixes the math/test-double-remainder issues
on alpha.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.

Reviewed-by: Wilco Dijkstra  <Wilco.Dijkstra@arm.com>
---

diff --git a/sysdeps/ieee754/dbl-64/e_remainder.c b/sysdeps/ieee754/dbl-64/e_remainder.c
index 04a30e2138..dbae3aab81 100644
--- a/sysdeps/ieee754/dbl-64/e_remainder.c
+++ b/sysdeps/ieee754/dbl-64/e_remainder.c
@@ -1,153 +1,73 @@
-/*
- * IBM Accurate Mathematical Library
- * written by International Business Machines Corp.
- * Copyright (C) 2001-2025 Free Software Foundation, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, see <https://www.gnu.org/licenses/>.
- */
-/**************************************************************************/
-/*  MODULE_NAME urem.c                                                    */
-/*                                                                        */
-/*  FUNCTION: uremainder                                                  */
-/*                                                                        */
-/* An ultimate remainder routine. Given two IEEE double machine numbers x */
-/* ,y   it computes the correctly rounded (to nearest) value of remainder */
-/* of dividing x by y.                                                    */
-/* Assumption: Machine arithmetic operations are performed in             */
-/* round to nearest mode of IEEE 754 standard.                            */
-/*                                                                        */
-/* ************************************************************************/
+/* Remainder function, double version.
+   Copyright (C) 2008-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
 
-#include "endian.h"
-#include "mydefs.h"
-#include "urem.h"
 #include <math.h>
-#include <math_private.h>
-#include <fenv_private.h>
 #include <libm-alias-finite.h>
+#include "math_config.h"
 
-/**************************************************************************/
-/* An ultimate remainder routine. Given two IEEE double machine numbers x */
-/* ,y   it computes the correctly rounded (to nearest) value of remainder */
-/**************************************************************************/
 double
 __ieee754_remainder (double x, double y)
 {
-  double z, d, xx;
-  int4 kx, ky, n, nn, n1, m1, l;
-  mynumber u, t, w = { { 0, 0 } }, v = { { 0, 0 } }, ww = { { 0, 0 } }, r;
-  u.x = x;
-  t.x = y;
-  kx = u.i[HIGH_HALF] & 0x7fffffff; /* no sign  for x*/
-  t.i[HIGH_HALF] &= 0x7fffffff;   /*no sign for y */
-  ky = t.i[HIGH_HALF];
-  /*------ |x| < 2^1023  and   2^-970 < |y| < 2^1024 ------------------*/
-  if (kx < 0x7fe00000 && ky < 0x7ff00000 && ky >= 0x03500000)
+  uint64_t hx = asuint64 (x);
+  uint64_t hy = asuint64 (y);
+  uint64_t sx = hx >> 63;
+
+  hx &= ~SIGN_MASK;
+  hy &= ~SIGN_MASK;
+
+  /* |y| < DBL_MAX / 2 ? */
+  y = fabs (y);
+  if (__glibc_likely (hy < UINT64_C (0x7fe0000000000000)))
     {
-      SET_RESTORE_ROUND_NOEX (FE_TONEAREST);
-      if (kx + 0x00100000 < ky)
-	return x;
-      if ((kx - 0x01500000) < ky)
-	{
-	  z = x / t.x;
-	  v.i[HIGH_HALF] = t.i[HIGH_HALF];
-	  d = (z + big.x) - big.x;
-	  xx = (x - d * v.x) - d * (t.x - v.x);
-	  if (d - z != 0.5 && d - z != -0.5)
-	    return (xx != 0) ? xx : ((x > 0) ? ZERO.x : nZERO.x);
-	  else
-	    {
-	      if (fabs (xx) > 0.5 * t.x)
-		return (z > d) ? xx - t.x : xx + t.x;
-	      else
-		return xx;
-	    }
-	} /*    (kx<(ky+0x01500000))         */
-      else
+      /* |x| not finite, |y| equal 0 is handled by fmod.  */
+      if (__glibc_unlikely (hx >= EXPONENT_MASK))
+	return (x * y) / (x * y);
+
+      x = fabs (__ieee754_fmod (x, y + y));
+      if (x + x > y)
 	{
-	  r.x = 1.0 / t.x;
-	  n = t.i[HIGH_HALF];
-	  nn = (n & 0x7ff00000) + 0x01400000;
-	  w.i[HIGH_HALF] = n;
-	  ww.x = t.x - w.x;
-	  l = (kx - nn) & 0xfff00000;
-	  n1 = ww.i[HIGH_HALF];
-	  m1 = r.i[HIGH_HALF];
-	  while (l > 0)
-	    {
-	      r.i[HIGH_HALF] = m1 - l;
-	      z = u.x * r.x;
-	      w.i[HIGH_HALF] = n + l;
-	      ww.i[HIGH_HALF] = (n1) ? n1 + l : n1;
-	      d = (z + big.x) - big.x;
-	      u.x = (u.x - d * w.x) - d * ww.x;
-	      l = (u.i[HIGH_HALF] & 0x7ff00000) - nn;
-	    }
-	  r.i[HIGH_HALF] = m1;
-	  w.i[HIGH_HALF] = n;
-	  ww.i[HIGH_HALF] = n1;
-	  z = u.x * r.x;
-	  d = (z + big.x) - big.x;
-	  u.x = (u.x - d * w.x) - d * ww.x;
-	  if (fabs (u.x) < 0.5 * t.x)
-	    return (u.x != 0) ? u.x : ((x > 0) ? ZERO.x : nZERO.x);
-	  else
-	  if (fabs (u.x) > 0.5 * t.x)
-	    return (d > z) ? u.x + t.x : u.x - t.x;
-	  else
-	    {
-	      z = u.x / t.x; d = (z + big.x) - big.x;
-              return ((u.x - d * w.x) - d * ww.x);
-	    }
+	  x -= y;
+	  if (x + x >= y)
+	    x -= y;
+	  /* Make sure x is not -0. This can occur only when x = y
+	     and rounding direction is towards negative infinity. */
+	  else if (x == 0.0)
+	    x = 0.0;
 	}
-    } /*   (kx<0x7fe00000&&ky<0x7ff00000&&ky>=0x03500000)     */
+    }
   else
     {
-      if (kx < 0x7fe00000 && ky < 0x7ff00000 && (ky > 0 || t.i[LOW_HALF] != 0))
-	{
-	  y = fabs (y) * t128.x;
-	  z = __ieee754_remainder (x, y) * t128.x;
-	  z = __ieee754_remainder (z, y) * tm128.x;
-	  return z;
-	}
-      else
+      /* |x| not finite or |y| is NaN or 0 */
+      if ((hx >= EXPONENT_MASK || (hy - 1) >= EXPONENT_MASK))
+	return (x * y) / (x * y);
+
+      x = fabs (x);
+      double y_half = y * 0.5;
+      if (x > y_half)
 	{
-	  if ((kx & 0x7ff00000) == 0x7fe00000 && ky < 0x7ff00000 &&
-              (ky > 0 || t.i[LOW_HALF] != 0))
-	    {
-	      y = fabs (y);
-	      z = 2.0 * __ieee754_remainder (0.5 * x, y);
-	      d = fabs (z);
-	      if (d <= fabs (d - y))
-		return z;
-	      else if (d == y)
-		return 0.0 * x;
-	      else
-		return (z > 0) ? z - y : z + y;
-	    }
-	  else /* if x is too big */
-	    {
-	      if (ky == 0 && t.i[LOW_HALF] == 0) /* y = 0 */
-		return (x * y) / (x * y);
-	      else if (kx >= 0x7ff00000         /* x not finite */
-		       || (ky > 0x7ff00000      /* y is NaN */
-			   || (ky == 0x7ff00000 && t.i[LOW_HALF] != 0)))
-		return (x * y) / (x * y);
-	      else
-		return x;
-	    }
+	  x -= y;
+	  if (x >= y_half)
+	    x -= y;
+	  else if (x == 0.0)
+	    x = 0.0;
 	}
     }
+
+  return sx ? -x : x;
 }
 libm_alias_finite (__ieee754_remainder, __remainder)
diff --git a/sysdeps/ieee754/dbl-64/urem.h b/sysdeps/ieee754/dbl-64/urem.h
deleted file mode 100644
index 38e1fc3200..0000000000
--- a/sysdeps/ieee754/dbl-64/urem.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * IBM Accurate Mathematical Library
- * Copyright (C) 2001-2025 Free Software Foundation, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, see <https://www.gnu.org/licenses/>.
- */
-
-/************************************************************************/
-/*  MODULE_NAME: urem.h                                                 */
-/*                                                                      */
-/*                                                                      */
-/* 	common data and variables definition for BIG or LITTLE ENDIAN   */
-/************************************************************************/
-
-#ifndef UREM_H
-#define UREM_H
-
-#ifdef BIG_ENDI
-static const mynumber big = {{0x43380000, 0}},  /* 6755399441055744 */
-                     t128 = {{0x47f00000, 0}},  /*  2^ 128          */
-                    tm128 = {{0x37f00000, 0}},  /*  2^-128          */
-                      ZERO = {{0, 0}},          /*  0.0             */
-                     nZERO = {{0x80000000, 0}}; /* -0.0             */
-#else
-#ifdef LITTLE_ENDI
-static const mynumber big = {{0, 0x43380000}},  /* 6755399441055744 */
-                     t128 = {{0, 0x47f00000}},  /*  2^ 128          */
-                    tm128 = {{0, 0x37f00000}},  /*  2^-128          */
-                      ZERO = {{0, 0}},          /*  0.0             */
-                     nZERO = {{0, 0x80000000}}; /* -0.0             */
-#endif
-#endif
-
-#endif