bpo-45766: Add direct proportion option to linear_regression(). (#29490)

author Raymond Hettinger <rhettinger@users.noreply.github.com>

Sun, 21 Nov 2021 14:39:26 +0000 (08:39 -0600)

committer GitHub <noreply@github.com>

Sun, 21 Nov 2021 14:39:26 +0000 (08:39 -0600)
author Raymond Hettinger <rhettinger@users.noreply.github.com>
Sun, 21 Nov 2021 14:39:26 +0000 (08:39 -0600)
committer GitHub <noreply@github.com>
Sun, 21 Nov 2021 14:39:26 +0000 (08:39 -0600)
diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst

index bb03a2ce6ee971bb79768bff29d22018bee28edc..8638abfb697b85151b9b52a0bc097df6ca962e20 100644 (file)
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -643,7 +643,7 @@ However, for reading convenience, most of the examples show sorted sequences.
  
     .. versionadded:: 3.10
  
-.. function:: linear_regression(x, y, /)
+.. function:: linear_regression(x, y, /, *, proportional=False)
  
     Return the slope and intercept of `simple linear regression
     <https://en.wikipedia.org/wiki/Simple_linear_regression>`_
@@ -677,8 +677,18 @@ However, for reading convenience, most of the examples show sorted sequences.
        >>> round(slope * 2019 + intercept)
        16
  
+   If *proportional* is true, the independent variable *x* and the
+   dependent variable *y* are assumed to be directly proportional.
+   The data is fit to a line passing through the origin.
+   Since the *intercept* will always be 0.0, the underlying linear
+   function simplifies to:
+
+      *y = slope \* x + noise*
+
     .. versionadded:: 3.10
  
+   .. versionchanged:: 3.11
+      Added support for *proportional*.
  
  Exceptions
  ----------
diff --git a/Lib/statistics.py b/Lib/statistics.py

index 4f3ab49b4021933157cb602c105b3707d3af3da3..5c3f77df1549ddd1c6c6232409a81f55481bd607 100644 (file)
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -937,13 +937,13 @@ def correlation(x, y, /):
  LinearRegression = namedtuple('LinearRegression', ('slope', 'intercept'))
  
  
-def linear_regression(x, y, /):
+def linear_regression(x, y, /, *, proportional=False):
      """Slope and intercept for simple linear regression.
  
      Return the slope and intercept of simple linear regression
      parameters estimated using ordinary least squares. Simple linear
      regression describes relationship between an independent variable
-    *x* and a dependent variable *y* in terms of linear function:
+    *x* and a dependent variable *y* in terms of a linear function:
  
          y = slope * x + intercept + noise
  
@@ -961,21 +961,38 @@ def linear_regression(x, y, /):
      >>> linear_regression(x, y)  #doctest: +ELLIPSIS
      LinearRegression(slope=3.09078914170..., intercept=1.75684970486...)
  
+    If *proportional* is true, the independent variable *x* and the
+    dependent variable *y* are assumed to be directly proportional.
+    The data is fit to a line passing through the origin.
+
+    Since the *intercept* will always be 0.0, the underlying linear
+    function simplifies to:
+
+        y = slope * x + noise
+
+    >>> y = [3 * x[i] + noise[i] for i in range(5)]
+    >>> linear_regression(x, y, proportional=True)  #doctest: +ELLIPSIS
+    LinearRegression(slope=3.02447542484..., intercept=0.0)
+
      """
      n = len(x)
      if len(y) != n:
          raise StatisticsError('linear regression requires that both inputs have same number of data points')
      if n < 2:
          raise StatisticsError('linear regression requires at least two data points')
-    xbar = fsum(x) / n
-    ybar = fsum(y) / n
-    sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y))
-    sxx = fsum((d := xi - xbar) * d for xi in x)
+    if proportional:
+        sxy = fsum(xi * yi for xi, yi in zip(x, y))
+        sxx = fsum(xi * xi for xi in x)
+    else:
+        xbar = fsum(x) / n
+        ybar = fsum(y) / n
+        sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y))
+        sxx = fsum((d := xi - xbar) * d for xi in x)
      try:
          slope = sxy / sxx   # equivalent to:  covariance(x, y) / variance(x)
      except ZeroDivisionError:
          raise StatisticsError('x is constant')
-    intercept = ybar - slope * xbar
+    intercept = 0.0 if proportional else ybar - slope * xbar
      return LinearRegression(slope=slope, intercept=intercept)
  
  
diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py

index fbc6a071cfd34b652adc5642ceb199de31ab46aa..c0e427d9355f257b4bca877e8b544c06662dea2d 100644 (file)
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -2527,6 +2527,12 @@ class TestLinearRegression(unittest.TestCase):
              self.assertAlmostEqual(intercept, true_intercept)
              self.assertAlmostEqual(slope, true_slope)
  
+    def test_proportional(self):
+        x = [10, 20, 30, 40]
+        y = [180, 398, 610, 799]
+        slope, intercept = statistics.linear_regression(x, y, proportional=True)
+        self.assertAlmostEqual(slope, 20 + 1/150)
+        self.assertEqual(intercept, 0.0)
  
  class TestNormalDist:
  
diff --git a/Misc/NEWS.d/next/Library/2021-11-09-09-18-06.bpo-45766.dvbcMf.rst b/Misc/NEWS.d/next/Library/2021-11-09-09-18-06.bpo-45766.dvbcMf.rst

new file mode 100644 (file)

index 0000000..b2e9c7e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-11-09-09-18-06.bpo-45766.dvbcMf.rst
@@ -0,0 +1 @@
+Added *proportional* option to :meth:`statistics.linear_regression`.
author	Raymond Hettinger <rhettinger@users.noreply.github.com>
	Sun, 21 Nov 2021 14:39:26 +0000 (08:39 -0600)
committer	GitHub <noreply@github.com>
	Sun, 21 Nov 2021 14:39:26 +0000 (08:39 -0600)
Doc/library/statistics.rst		patch \| blob \| blame \| history
Lib/statistics.py		patch \| blob \| blame \| history
Lib/test/test_statistics.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2021-11-09-09-18-06.bpo-45766.dvbcMf.rst	[new file with mode: 0644]	patch \| blob