From: Raymond Hettinger Date: Mon, 29 Aug 2022 17:19:48 +0000 (-0500) Subject: Improve accuracy for Spearman's rank correlation coefficient. (#96392) X-Git-Tag: v3.12.0a1~532 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=3d180e3ab21c5d41d1c46e3ef349b30ba409f300;p=thirdparty%2FPython%2Fcpython.git Improve accuracy for Spearman's rank correlation coefficient. (#96392) --- diff --git a/Lib/statistics.py b/Lib/statistics.py index b4676fed5e28..b4adabd3f05a 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -379,7 +379,7 @@ def _rank(data, /, *, key=None, reverse=False, ties='average', start=1) -> list[ [2.0, 1.0, 3.0] Ranks are conventionally numbered starting from one; however, - setting *start* to zero allow the ranks to be used as array indices: + setting *start* to zero allows the ranks to be used as array indices: >>> prize = ['Gold', 'Silver', 'Bronze', 'Certificate'] >>> scores = [8.1, 7.3, 9.4, 8.3] @@ -1073,8 +1073,9 @@ def correlation(x, y, /, *, method='linear'): if method not in {'linear', 'ranked'}: raise ValueError(f'Unknown method: {method!r}') if method == 'ranked': - x = _rank(x) - y = _rank(y) + start = (n - 1) / -2 # Center rankings around zero + x = _rank(x, start=start) + y = _rank(y, start=start) xbar = fsum(x) / n ybar = fsum(y) / n sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y))