]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
Eliminate duplicated calculations and unnecessary work for linear regression (GH...
authorRaymond Hettinger <rhettinger@users.noreply.github.com>
Thu, 6 May 2021 14:43:13 +0000 (07:43 -0700)
committerGitHub <noreply@github.com>
Thu, 6 May 2021 14:43:13 +0000 (07:43 -0700)
Lib/statistics.py

index edb11c868c1c8794569630229e2f44be76574d24..db8c581068b7ddedf1aed16ce304bb97dc19903f 100644 (file)
@@ -952,11 +952,16 @@ def linear_regression(regressor, dependent_variable, /):
         raise StatisticsError('linear regression requires that both inputs have same number of data points')
     if n < 2:
         raise StatisticsError('linear regression requires at least two data points')
+    x, y = regressor, dependent_variable
+    xbar = fsum(x) / n
+    ybar = fsum(y) / n
+    sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y))
+    s2x = fsum((xi - xbar) ** 2.0 for xi in x)
     try:
-        slope = covariance(regressor, dependent_variable) / variance(regressor)
+        slope = sxy / s2x
     except ZeroDivisionError:
         raise StatisticsError('regressor is constant')
-    intercept = fmean(dependent_variable) - slope * fmean(regressor)
+    intercept = ybar - slope * xbar
     return LinearRegression(intercept=intercept, slope=slope)