GH-100425: Improve accuracy of builtin sum() for float inputs (GH-100426)

author Raymond Hettinger <rhettinger@users.noreply.github.com>

Fri, 23 Dec 2022 22:35:58 +0000 (14:35 -0800)

committer GitHub <noreply@github.com>

Fri, 23 Dec 2022 22:35:58 +0000 (14:35 -0800)
author Raymond Hettinger <rhettinger@users.noreply.github.com>
Fri, 23 Dec 2022 22:35:58 +0000 (14:35 -0800)
committer GitHub <noreply@github.com>
Fri, 23 Dec 2022 22:35:58 +0000 (14:35 -0800)
diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst

index 2110990d1889733e2a15d25889ab2e917498ace4..817c1f858aae8965f98a0f560a606853cdbfb52c 100644 (file)
--- a/Doc/library/functions.rst
+++ b/Doc/library/functions.rst
@@ -1733,6 +1733,10 @@ are always available.  They are listed here in alphabetical order.
     .. versionchanged:: 3.8
        The *start* parameter can be specified as a keyword argument.
  
+   .. versionchanged:: 3.12 Summation of floats switched to an algorithm
+      that gives higher accuracy on most builds.
+
+
  .. class:: super()
             super(type, object_or_type=None)
  
diff --git a/Doc/library/math.rst b/Doc/library/math.rst

index 559c6ec5dd9d8a6f16dd0cb03146bc2e54bd74f0..aeebcaf6ab0864bde3d30076bffb9595c0796ba4 100644 (file)
--- a/Doc/library/math.rst
+++ b/Doc/library/math.rst
@@ -108,12 +108,7 @@ Number-theoretic and representation functions
  .. function:: fsum(iterable)
  
     Return an accurate floating point sum of values in the iterable.  Avoids
-   loss of precision by tracking multiple intermediate partial sums:
-
-        >>> sum([.1, .1, .1, .1, .1, .1, .1, .1, .1, .1])
-        0.9999999999999999
-        >>> fsum([.1, .1, .1, .1, .1, .1, .1, .1, .1, .1])
-        1.0
+   loss of precision by tracking multiple intermediate partial sums.
  
     The algorithm's accuracy depends on IEEE-754 arithmetic guarantees and the
     typical case where the rounding mode is half-even.  On some non-Windows
diff --git a/Doc/tutorial/floatingpoint.rst b/Doc/tutorial/floatingpoint.rst

index e1cd7f9ece75d0e730ae6e303aa650c416644034..cedade6e3366089e3efa93f54743bcac6403425c 100644 (file)
--- a/Doc/tutorial/floatingpoint.rst
+++ b/Doc/tutorial/floatingpoint.rst
@@ -192,7 +192,7 @@ added onto a running total.  That can make a difference in overall accuracy
  so that the errors do not accumulate to the point where they affect the
  final total:
  
-   >>> sum([0.1] * 10) == 1.0
+   >>> 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 == 1.0
     False
     >>> math.fsum([0.1] * 10) == 1.0
     True
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py

index eb1c389257cc4b536a513db632513c55c9283ee2..c65600483258a790cdde69223d2150749e61bcc6 100644 (file)
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -9,6 +9,7 @@ import fractions
  import gc
  import io
  import locale
+import math
  import os
  import pickle
  import platform
@@ -31,6 +32,7 @@ from test.support import (swap_attr, maybe_get_event_loop_policy)
  from test.support.os_helper import (EnvironmentVarGuard, TESTFN, unlink)
  from test.support.script_helper import assert_python_ok
  from test.support.warnings_helper import check_warnings
+from test.support import requires_IEEE_754
  from unittest.mock import MagicMock, patch
  try:
      import pty, signal
@@ -38,6 +40,12 @@ except ImportError:
      pty = signal = None
  
  
+# Detect evidence of double-rounding: sum() does not always
+# get improved accuracy on machines that suffer from double rounding.
+x, y = 1e16, 2.9999 # use temporary values to defeat peephole optimizer
+HAVE_DOUBLE_ROUNDING = (x + y == 1e16 + 4)
+
+
  class Squares:
  
      def __init__(self, max):
@@ -1617,6 +1625,8 @@ class BuiltinTest(unittest.TestCase):
          self.assertEqual(repr(sum([-0.0])), '0.0')
          self.assertEqual(repr(sum([-0.0], -0.0)), '-0.0')
          self.assertEqual(repr(sum([], -0.0)), '-0.0')
+        self.assertTrue(math.isinf(sum([float("inf"), float("inf")])))
+        self.assertTrue(math.isinf(sum([1e308, 1e308])))
  
          self.assertRaises(TypeError, sum)
          self.assertRaises(TypeError, sum, 42)
@@ -1641,6 +1651,14 @@ class BuiltinTest(unittest.TestCase):
          sum(([x] for x in range(10)), empty)
          self.assertEqual(empty, [])
  
+    @requires_IEEE_754
+    @unittest.skipIf(HAVE_DOUBLE_ROUNDING,
+                         "sum accuracy not guaranteed on machines with double rounding")
+    @support.cpython_only    # Other implementations may choose a different algorithm
+    def test_sum_accuracy(self):
+        self.assertEqual(sum([0.1] * 10), 1.0)
+        self.assertEqual(sum([1.0, 10E100, 1.0, -10E100]), 2.0)
+
      def test_type(self):
          self.assertEqual(type(''),  type('123'))
          self.assertNotEqual(type(''), type(()))
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-12-21-22-48-41.gh-issue-100425.U64yLu.rst b/Misc/NEWS.d/next/Core and Builtins/2022-12-21-22-48-41.gh-issue-100425.U64yLu.rst

new file mode 100644 (file)

index 0000000..5559020
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-12-21-22-48-41.gh-issue-100425.U64yLu.rst
@@ -0,0 +1 @@
+Improve the accuracy of ``sum()`` with compensated summation.
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c

index ff96c25da5ebc6e0a0edc78c2e2fe380f7a99dce..2d4822e6d468aaf5894c3e2c3e3dce0e45c67853 100644 (file)
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -2532,6 +2532,7 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start)
  
      if (PyFloat_CheckExact(result)) {
          double f_result = PyFloat_AS_DOUBLE(result);
+        double c = 0.0;
          Py_SETREF(result, NULL);
          while(result == NULL) {
              item = PyIter_Next(iter);
@@ -2539,10 +2540,25 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start)
                  Py_DECREF(iter);
                  if (PyErr_Occurred())
                      return NULL;
+                /* Avoid losing the sign on a negative result,
+                   and don't let adding the compensation convert
+                   an infinite or overflowed sum to a NaN. */
+                if (c && Py_IS_FINITE(c)) {
+                    f_result += c;
+                }
                  return PyFloat_FromDouble(f_result);
              }
              if (PyFloat_CheckExact(item)) {
-                f_result += PyFloat_AS_DOUBLE(item);
+                // Improved Kahan–Babuška algorithm by Arnold Neumaier
+                // https://www.mat.univie.ac.at/~neum/scan/01.pdf
+                double x = PyFloat_AS_DOUBLE(item);
+                double t = f_result + x;
+                if (fabs(f_result) >= fabs(x)) {
+                    c += (f_result - t) + x;
+                } else {
+                    c += (x - t) + f_result;
+                }
+                f_result = t;
                  _Py_DECREF_SPECIALIZED(item, _PyFloat_ExactDealloc);
                  continue;
              }
@@ -2556,6 +2572,9 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start)
                      continue;
                  }
              }
+            if (c && Py_IS_FINITE(c)) {
+                f_result += c;
+            }
              result = PyFloat_FromDouble(f_result);
              if (result == NULL) {
                  Py_DECREF(item);
author	Raymond Hettinger <rhettinger@users.noreply.github.com>
	Fri, 23 Dec 2022 22:35:58 +0000 (14:35 -0800)
committer	GitHub <noreply@github.com>
	Fri, 23 Dec 2022 22:35:58 +0000 (14:35 -0800)
Doc/library/functions.rst		patch \| blob \| blame \| history
Doc/library/math.rst		patch \| blob \| blame \| history
Doc/tutorial/floatingpoint.rst		patch \| blob \| blame \| history
Lib/test/test_builtin.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Core and Builtins/2022-12-21-22-48-41.gh-issue-100425.U64yLu.rst	[new file with mode: 0644]	patch \| blob
Python/bltinmodule.c		patch \| blob \| blame \| history