From: Raymond Hettinger Date: Thu, 6 May 2021 14:43:13 +0000 (-0700) Subject: Eliminate duplicated calculations and unnecessary work for linear regression (GH... X-Git-Tag: v3.11.0a1~1193 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=55b78ce3c4e23abe4f27bf16d7968f8851532e47;p=thirdparty%2FPython%2Fcpython.git Eliminate duplicated calculations and unnecessary work for linear regression (GH-25922) --- diff --git a/Lib/statistics.py b/Lib/statistics.py index edb11c868c1c..db8c581068b7 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -952,11 +952,16 @@ def linear_regression(regressor, dependent_variable, /): raise StatisticsError('linear regression requires that both inputs have same number of data points') if n < 2: raise StatisticsError('linear regression requires at least two data points') + x, y = regressor, dependent_variable + xbar = fsum(x) / n + ybar = fsum(y) / n + sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y)) + s2x = fsum((xi - xbar) ** 2.0 for xi in x) try: - slope = covariance(regressor, dependent_variable) / variance(regressor) + slope = sxy / s2x except ZeroDivisionError: raise StatisticsError('regressor is constant') - intercept = fmean(dependent_variable) - slope * fmean(regressor) + intercept = ybar - slope * xbar return LinearRegression(intercept=intercept, slope=slope)