bpo-41513: Improve speed and accuracy of math.hypot() (GH-21803)

author Raymond Hettinger <rhettinger@users.noreply.github.com>

Sun, 16 Aug 2020 02:38:19 +0000 (19:38 -0700)

committer GitHub <noreply@github.com>

Sun, 16 Aug 2020 02:38:19 +0000 (19:38 -0700)
author Raymond Hettinger <rhettinger@users.noreply.github.com>
Sun, 16 Aug 2020 02:38:19 +0000 (19:38 -0700)
committer GitHub <noreply@github.com>
Sun, 16 Aug 2020 02:38:19 +0000 (19:38 -0700)
diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py

index e06b1e6a5b9b7c41fb1782cdde7618ad43a30e23..4d62eb1b119930dec7147424a8cccee0c7a44df2 100644 (file)
--- a/Lib/test/test_math.py
+++ b/Lib/test/test_math.py
@@ -795,7 +795,8 @@ class MathTests(unittest.TestCase):
          # Verify scaling for extremely large values
          fourthmax = FLOAT_MAX / 4.0
          for n in range(32):
-            self.assertEqual(hypot(*([fourthmax]*n)), fourthmax * math.sqrt(n))
+            self.assertTrue(math.isclose(hypot(*([fourthmax]*n)),
+                                         fourthmax * math.sqrt(n)))
  
          # Verify scaling for extremely small values
          for exp in range(32):
@@ -904,8 +905,8 @@ class MathTests(unittest.TestCase):
          for n in range(32):
              p = (fourthmax,) * n
              q = (0.0,) * n
-            self.assertEqual(dist(p, q), fourthmax * math.sqrt(n))
-            self.assertEqual(dist(q, p), fourthmax * math.sqrt(n))
+            self.assertTrue(math.isclose(dist(p, q), fourthmax * math.sqrt(n)))
+            self.assertTrue(math.isclose(dist(q, p), fourthmax * math.sqrt(n)))
  
          # Verify scaling for extremely small values
          for exp in range(32):
diff --git a/Misc/NEWS.d/next/Library/2020-08-09-18-16-05.bpo-41513.e6K6EK.rst b/Misc/NEWS.d/next/Library/2020-08-09-18-16-05.bpo-41513.e6K6EK.rst

new file mode 100644 (file)

index 0000000..cfb9f98
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-08-09-18-16-05.bpo-41513.e6K6EK.rst
@@ -0,0 +1,2 @@
+Minor algorithmic improvement to math.hypot() and math.dist() giving small
+gains in speed and accuracy.
diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c

index 411c6eb1935fab1530ddf6c292a0d540590c9da5..489802cc3674509e746e6266224223123358a93f 100644 (file)
--- a/Modules/mathmodule.c
+++ b/Modules/mathmodule.c
@@ -2406,6 +2406,13 @@ math_fmod_impl(PyObject *module, double x, double y)
  /*
  Given an *n* length *vec* of values and a value *max*, compute:
  
+    sqrt(sum((x * scale) ** 2 for x in vec)) / scale
+
+           where scale is the first power of two
+           greater than max.
+
+or compute:
+
      max * sqrt(sum((x / max) ** 2 for x in vec))
  
  The value of the *max* variable must be non-negative and
@@ -2425,19 +2432,25 @@ The *csum* variable tracks the cumulative sum and *frac* tracks
  the cumulative fractional errors at each step.  Since this
  variant assumes that |csum| >= |x| at each step, we establish
  the precondition by starting the accumulation from 1.0 which
-represents the largest possible value of (x/max)**2.
+represents the largest possible value of (x*scale)**2 or (x/max)**2.
  
  After the loop is finished, the initial 1.0 is subtracted out
  for a net zero effect on the final sum.  Since *csum* will be
  greater than 1.0, the subtraction of 1.0 will not cause
  fractional digits to be dropped from *csum*.
  
+To get the full benefit from compensated summation, the
+largest addend should be in the range:   0.5 <= x <= 1.0.
+Accordingly, scaling or division by *max* should not be skipped
+even if not otherwise needed to prevent overflow or loss of precision.
+
  */
  
  static inline double
  vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
  {
-    double x, csum = 1.0, oldcsum, frac = 0.0;
+    double x, csum = 1.0, oldcsum, frac = 0.0, scale;
+    int max_e;
      Py_ssize_t i;
  
      if (Py_IS_INFINITY(max)) {
@@ -2449,14 +2462,36 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
      if (max == 0.0 || n <= 1) {
          return max;
      }
+    frexp(max, &max_e);
+    if (max_e >= -1023) {
+        scale = ldexp(1.0, -max_e);
+        assert(max * scale >= 0.5);
+        assert(max * scale < 1.0);
+        for (i=0 ; i < n ; i++) {
+            x = vec[i];
+            assert(Py_IS_FINITE(x) && fabs(x) <= max);
+            x *= scale;
+            x = x*x;
+            assert(x <= 1.0);
+            assert(csum >= x);
+            oldcsum = csum;
+            csum += x;
+            frac += (oldcsum - csum) + x;
+        }
+        return sqrt(csum - 1.0 + frac) / scale;
+    }
+    /* When max_e < -1023, ldexp(1.0, -max_e) overflows.
+       So instead of multiplying by a scale, we just divide by *max*.
+    */
      for (i=0 ; i < n ; i++) {
          x = vec[i];
          assert(Py_IS_FINITE(x) && fabs(x) <= max);
          x /= max;
          x = x*x;
+        assert(x <= 1.0);
+        assert(csum >= x);
          oldcsum = csum;
          csum += x;
-        assert(csum >= x);
          frac += (oldcsum - csum) + x;
      }
      return max * sqrt(csum - 1.0 + frac);
author	Raymond Hettinger <rhettinger@users.noreply.github.com>
	Sun, 16 Aug 2020 02:38:19 +0000 (19:38 -0700)
committer	GitHub <noreply@github.com>
	Sun, 16 Aug 2020 02:38:19 +0000 (19:38 -0700)
Lib/test/test_math.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2020-08-09-18-16-05.bpo-41513.e6K6EK.rst	[new file with mode: 0644]	patch \| blob
Modules/mathmodule.c		patch \| blob \| blame \| history