]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
Minor accuracy improvement for statistics.correlation() (GH-107781)
authorRaymond Hettinger <rhettinger@users.noreply.github.com>
Tue, 8 Aug 2023 16:12:52 +0000 (18:12 +0200)
committerGitHub <noreply@github.com>
Tue, 8 Aug 2023 16:12:52 +0000 (17:12 +0100)
Lib/statistics.py

index 6bd214bbfe2ff5976b19408b08d2fcdd9edc66fa..066669d25ddb1264df30b01555c79e2d73fdf628 100644 (file)
@@ -1004,6 +1004,14 @@ def _mean_stdev(data):
         # Handle Nans and Infs gracefully
         return float(xbar), float(xbar) / float(ss)
 
+def _sqrtprod(x: float, y: float) -> float:
+    "Return sqrt(x * y) computed with high accuracy."
+    # Square root differential correction:
+    # https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0
+    h = sqrt(x * y)
+    x = sumprod((x, h), (y, -h))
+    return h + x / (2.0 * h)
+
 
 # === Statistics for relations between two inputs ===
 
@@ -1083,7 +1091,7 @@ def correlation(x, y, /, *, method='linear'):
     sxx = sumprod(x, x)
     syy = sumprod(y, y)
     try:
-        return sxy / sqrt(sxx * syy)
+        return sxy / _sqrtprod(sxx, syy)
     except ZeroDivisionError:
         raise StatisticsError('at least one of the inputs is constant')