gh-110150: Fix base case handling in quantiles() (gh-110151)

author Raymond Hettinger <rhettinger@users.noreply.github.com>

Sun, 1 Oct 2023 04:35:54 +0000 (23:35 -0500)

committer GitHub <noreply@github.com>

Sun, 1 Oct 2023 04:35:54 +0000 (23:35 -0500)
author Raymond Hettinger <rhettinger@users.noreply.github.com>
Sun, 1 Oct 2023 04:35:54 +0000 (23:35 -0500)
committer GitHub <noreply@github.com>
Sun, 1 Oct 2023 04:35:54 +0000 (23:35 -0500)
diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst

index f3c1bf20ae3ac874a514ede78f5358f24cff58b0..5c8ad3a7dd73803ac5df2d6dbfad348b61cb70be 100644 (file)
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -585,7 +585,7 @@ However, for reading convenience, most of the examples show sorted sequences.
  
     The *data* can be any iterable containing sample data.  For meaningful
     results, the number of data points in *data* should be larger than *n*.
-   Raises :exc:`StatisticsError` if there are not at least two data points.
+   Raises :exc:`StatisticsError` if there is not at least one data point.
  
     The cut points are linearly interpolated from the
     two nearest data points.  For example, if a cut point falls one-third
@@ -625,6 +625,11 @@ However, for reading convenience, most of the examples show sorted sequences.
  
     .. versionadded:: 3.8
  
+   .. versionchanged:: 3.13
+      No longer raises an exception for an input with only a single data point.
+      This allows quantile estimates to be built up one sample point
+      at a time becoming gradually more refined with each new data point.
+
  .. function:: covariance(x, y, /)
  
     Return the sample covariance of two inputs *x* and *y*. Covariance
diff --git a/Lib/statistics.py b/Lib/statistics.py

index 96c803483057e77654d5fcc34f7b163d38723d8a..4da06889c6db4691c160a12929853fe0e2d8d749 100644 (file)
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -844,7 +844,9 @@ def quantiles(data, *, n=4, method='exclusive'):
      data = sorted(data)
      ld = len(data)
      if ld < 2:
-        raise StatisticsError('must have at least two data points')
+        if ld == 1:
+            return data * (n - 1)
+        raise StatisticsError('must have at least one data point')
      if method == 'inclusive':
          m = ld - 1
          result = []
diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py

index f9b0ac2ad7b11610fc39aec00683c026ab4de69e..b24fc3c3d077fe4c99319d05539985149f1c6d4d 100644 (file)
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -2454,6 +2454,11 @@ class TestQuantiles(unittest.TestCase):
              data = random.choices(range(100), k=k)
              q1, q2, q3 = quantiles(data, method='inclusive')
              self.assertEqual(q2, statistics.median(data))
+        # Base case with a single data point:  When estimating quantiles from
+        # a sample, we want to be able to add one sample point at a time,
+        # getting increasingly better estimates.
+        self.assertEqual(quantiles([10], n=4), [10.0, 10.0, 10.0])
+        self.assertEqual(quantiles([10], n=4, method='exclusive'), [10.0, 10.0, 10.0])
  
      def test_equal_inputs(self):
          quantiles = statistics.quantiles
@@ -2504,7 +2509,7 @@ class TestQuantiles(unittest.TestCase):
          with self.assertRaises(ValueError):
              quantiles([10, 20, 30], method='X') # method is unknown
          with self.assertRaises(StatisticsError):
-            quantiles([10], n=4)                # not enough data points
+            quantiles([], n=4)                  # not enough data points
          with self.assertRaises(TypeError):
              quantiles([10, None, 30], n=4)      # data is non-numeric
  
diff --git a/Misc/NEWS.d/next/Library/2023-09-30-12-50-47.gh-issue-110150.9j0Ij5.rst b/Misc/NEWS.d/next/Library/2023-09-30-12-50-47.gh-issue-110150.9j0Ij5.rst

new file mode 100644 (file)

index 0000000..3c4dde5
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-09-30-12-50-47.gh-issue-110150.9j0Ij5.rst
@@ -0,0 +1,2 @@
+Fix base case handling in statistics.quantiles.  Now allows a single data
+point.
author	Raymond Hettinger <rhettinger@users.noreply.github.com>
	Sun, 1 Oct 2023 04:35:54 +0000 (23:35 -0500)
committer	GitHub <noreply@github.com>
	Sun, 1 Oct 2023 04:35:54 +0000 (23:35 -0500)
Doc/library/statistics.rst		patch \| blob \| blame \| history
Lib/statistics.py		patch \| blob \| blame \| history
Lib/test/test_statistics.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2023-09-30-12-50-47.gh-issue-110150.9j0Ij5.rst	[new file with mode: 0644]	patch \| blob