Fix powerpc software sqrt (bug 17964).

author Joseph Myers <joseph@codesourcery.com>

Thu, 12 Feb 2015 23:05:37 +0000 (23:05 +0000)

committer Aurelien Jarno <aurelien@aurel32.net>

Mon, 20 Feb 2017 21:04:52 +0000 (22:04 +0100)
author Joseph Myers <joseph@codesourcery.com>
Thu, 12 Feb 2015 23:05:37 +0000 (23:05 +0000)
committer Aurelien Jarno <aurelien@aurel32.net>
Mon, 20 Feb 2017 21:04:52 +0000 (22:04 +0100)
diff --git a/ChangeLog b/ChangeLog

index 92b8a2e35cc3b44c99fd59e71ad6816e3a5377e8..a81d6234a7f7ebb794752791ce3318dc6205c04b 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2015-02-12  Joseph Myers  <joseph@codesourcery.com>
+
+       [BZ #17964]
+       * sysdeps/powerpc/fpu/e_sqrt.c (__slow_ieee754_sqrt): Use
+       __builtin_fma instead of relying on contraction of a * b + c.
+
  2015-01-28  Adhemerval Zanellla  <azanella@linux.vnet.ibm.com>
  
         [BZ #16576]
diff --git a/NEWS b/NEWS

index f62b876d81fadaa6ea3c39721b8cfb866854c972..bdbf52bd7c05dd9bb321caa67f69c27173eb7a52 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -12,8 +12,8 @@ Version 2.19.1
    15946, 16009, 16545, 16574, 16576, 16623, 16657, 16695, 16743, 16758,
    16759, 16760, 16878, 16882, 16885, 16916, 16932, 16943, 16958, 17048,
    17062, 17069, 17079, 17137, 17153, 17213, 17263, 17269, 17325, 17523,
-  17555, 17905, 18007, 18032, 18080, 18240, 18287, 18508, 18665, 18905,
-  18928, 19018, 19779, 19791, 19879, 20010, 20112.
+  17555, 17905, 17964, 18007, 18032, 18080, 18240, 18287, 18508, 18665,
+  18905, 18928, 19018, 19779, 19791, 19879, 20010, 20112.
  
  * A buffer overflow in gethostbyname_r and related functions performing DNS
    requests has been fixed.  If the NSS functions were called with a
diff --git a/sysdeps/powerpc/fpu/e_sqrt.c b/sysdeps/powerpc/fpu/e_sqrt.c

index 24dfe686255fbbd0a23f1b981f1dbb2b82fe12f1..022d71bcb0eaf9acc4abd24d00038c4ae63e91e1 100644 (file)
--- a/sysdeps/powerpc/fpu/e_sqrt.c
+++ b/sysdeps/powerpc/fpu/e_sqrt.c
@@ -99,38 +99,41 @@ __slow_ieee754_sqrt (double x)
           /* Here we have three Newton-Raphson iterations each of a
              division and a square root and the remainder of the
              argument reduction, all interleaved.   */
-         sd = -(sg * sg - sx);
+         sd = -__builtin_fma (sg, sg, -sx);
           fsgi = (xi0 + 0x40000000) >> 1 & 0x7ff00000;
           sy2 = sy + sy;
-         sg = sy * sd + sg;    /* 16-bit approximation to sqrt(sx). */
+         sg = __builtin_fma (sy, sd, sg);      /* 16-bit approximation to
+                                                  sqrt(sx). */
  
           /* schedule the INSERT_WORDS (fsg, fsgi, 0) to get separation
              between the store and the load.  */
           INSERT_WORDS (fsg, fsgi, 0);
           iw_u.parts.msw = fsgi;
           iw_u.parts.lsw = (0);
-         e = -(sy * sg - almost_half);
-         sd = -(sg * sg - sx);
+         e = -__builtin_fma (sy, sg, -almost_half);
+         sd = -__builtin_fma (sg, sg, -sx);
           if ((xi0 & 0x7ff00000) == 0)
             goto denorm;
-         sy = sy + e * sy2;
-         sg = sg + sy * sd;    /* 32-bit approximation to sqrt(sx).  */
+         sy = __builtin_fma (e, sy2, sy);
+         sg = __builtin_fma (sy, sd, sg);      /* 32-bit approximation to
+                                                  sqrt(sx).  */
           sy2 = sy + sy;
           /* complete the INSERT_WORDS (fsg, fsgi, 0) operation.  */
           fsg = iw_u.value;
-         e = -(sy * sg - almost_half);
-         sd = -(sg * sg - sx);
-         sy = sy + e * sy2;
+         e = -__builtin_fma (sy, sg, -almost_half);
+         sd = -__builtin_fma (sg, sg, -sx);
+         sy = __builtin_fma (e, sy2, sy);
           shx = sx * fsg;
-         sg = sg + sy * sd;    /* 64-bit approximation to sqrt(sx),
-                                  but perhaps rounded incorrectly.  */
+         sg = __builtin_fma (sy, sd, sg);      /* 64-bit approximation to
+                                                  sqrt(sx), but perhaps
+                                                  rounded incorrectly.  */
           sy2 = sy + sy;
           g = sg * fsg;
-         e = -(sy * sg - almost_half);
-         d = -(g * sg - shx);
-         sy = sy + e * sy2;
+         e = -__builtin_fma (sy, sg, -almost_half);
+         d = -__builtin_fma (g, sg, -shx);
+         sy = __builtin_fma (e, sy2, sy);
           fesetenv_register (fe);
-         return g + sy * d;
+         return __builtin_fma (sy, d, g);
         denorm:
           /* For denormalised numbers, we normalise, calculate the
              square root, and return an adjusted result.  */
author	Joseph Myers <joseph@codesourcery.com>
	Thu, 12 Feb 2015 23:05:37 +0000 (23:05 +0000)
committer	Aurelien Jarno <aurelien@aurel32.net>
	Mon, 20 Feb 2017 21:04:52 +0000 (22:04 +0100)
ChangeLog		patch \| blob \| blame \| history
NEWS		patch \| blob \| blame \| history
sysdeps/powerpc/fpu/e_sqrt.c		patch \| blob \| blame \| history