From: Raymond Hettinger Date: Tue, 8 Aug 2023 16:12:52 +0000 (+0200) Subject: Minor accuracy improvement for statistics.correlation() (GH-107781) X-Git-Tag: v3.13.0a1~1028 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=d4ac094cf9d15ec5705ec0fe8771df9e6ba915b9;p=thirdparty%2FPython%2Fcpython.git Minor accuracy improvement for statistics.correlation() (GH-107781) --- diff --git a/Lib/statistics.py b/Lib/statistics.py index 6bd214bbfe2f..066669d25ddb 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -1004,6 +1004,14 @@ def _mean_stdev(data): # Handle Nans and Infs gracefully return float(xbar), float(xbar) / float(ss) +def _sqrtprod(x: float, y: float) -> float: + "Return sqrt(x * y) computed with high accuracy." + # Square root differential correction: + # https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0 + h = sqrt(x * y) + x = sumprod((x, h), (y, -h)) + return h + x / (2.0 * h) + # === Statistics for relations between two inputs === @@ -1083,7 +1091,7 @@ def correlation(x, y, /, *, method='linear'): sxx = sumprod(x, x) syy = sumprod(y, y) try: - return sxy / sqrt(sxx * syy) + return sxy / _sqrtprod(sxx, syy) except ZeroDivisionError: raise StatisticsError('at least one of the inputs is constant')