GH-100485: Create an alternative code path when an accurate fma() implementation...

author Raymond Hettinger <rhettinger@users.noreply.github.com>

Sat, 4 Feb 2023 23:54:44 +0000 (17:54 -0600)

committer GitHub <noreply@github.com>

Sat, 4 Feb 2023 23:54:44 +0000 (17:54 -0600)
author Raymond Hettinger <rhettinger@users.noreply.github.com>
Sat, 4 Feb 2023 23:54:44 +0000 (17:54 -0600)
committer GitHub <noreply@github.com>
Sat, 4 Feb 2023 23:54:44 +0000 (17:54 -0600)
diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py

index 2c84e55ab45c57df245cb6d76cdd43b10d2048b0..8d849045b2d11f9d06471a65c42396747ac0bd33 100644 (file)
--- a/Lib/test/test_math.py
+++ b/Lib/test/test_math.py
@@ -1450,6 +1450,11 @@ class MathTests(unittest.TestCase):
          n = 20                # Length of vectors
          c = 1e30              # Target condition number
  
+        # If the following test fails, it means that the C math library
+        # implementation of fma() is not compliant with the C99 standard
+        # and is inaccurate.  To solve this problem, make a new build
+        # with the symbol UNRELIABLE_FMA defined.  That will enable a
+        # slower but accurate code path that avoids the fma() call.
          relative_err = median(Trial(math.sumprod, c, n) for i in range(times))
          self.assertLess(relative_err, 1e-16)
  
diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c

index e6cdb3bae1ecff19a468836af9c3f0a2635b77e3..654336d6d9f4bc3de643587df20f89d5ebcc09f1 100644 (file)
--- a/Modules/mathmodule.c
+++ b/Modules/mathmodule.c
@@ -2851,6 +2851,8 @@ dl_sum(double a, double b)
      return (DoubleLength) {x, y};
  }
  
+#ifndef UNRELIABLE_FMA
+
  static DoubleLength
  dl_mul(double x, double y)
  {
@@ -2860,6 +2862,47 @@ dl_mul(double x, double y)
      return (DoubleLength) {z, zz};
  }
  
+#else
+
+/*
+   The default implementation of dl_mul() depends on the C math library
+   having an accurate fma() function as required by § 7.12.13.1 of the
+   C99 standard.
+
+   The UNRELIABLE_FMA option is provided as a slower but accurate
+   alternative for builds where the fma() function is found wanting.
+   The speed penalty may be modest (17% slower on an Apple M1 Max),
+   so don't hesitate to enable this build option.
+
+   The algorithms are from the T. J. Dekker paper:
+   A Floating-Point Technique for Extending the Available Precision
+   https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
+*/
+
+static DoubleLength
+dl_split(double x) {
+    // Dekker (5.5) and (5.6).
+    double t = x * 134217729.0;  // Veltkamp constant = 2.0 ** 27 + 1
+    double hi = t - (t - x);
+    double lo = x - hi;
+    return (DoubleLength) {hi, lo};
+}
+
+static DoubleLength
+dl_mul(double x, double y)
+{
+    // Dekker (5.12) and mul12()
+    DoubleLength xx = dl_split(x);
+    DoubleLength yy = dl_split(y);
+    double p = xx.hi * yy.hi;
+    double q = xx.hi * yy.lo + xx.lo * yy.hi;
+    double z = p + q;
+    double zz = p - z + q + xx.lo * yy.lo;
+    return (DoubleLength) {z, zz};
+}
+
+#endif
+
  typedef struct { double hi; double lo; double tiny; } TripleLength;
  
  static const TripleLength tl_zero = {0.0, 0.0, 0.0};
author	Raymond Hettinger <rhettinger@users.noreply.github.com>
	Sat, 4 Feb 2023 23:54:44 +0000 (17:54 -0600)
committer	GitHub <noreply@github.com>
	Sat, 4 Feb 2023 23:54:44 +0000 (17:54 -0600)
Lib/test/test_math.py		patch \| blob \| blame \| history
Modules/mathmodule.c		patch \| blob \| blame \| history