bpo-44150: Support optional weights parameter for fmean() (GH-26175)

author Raymond Hettinger <rhettinger@users.noreply.github.com>

Fri, 21 May 2021 03:22:26 +0000 (20:22 -0700)

committer GitHub <noreply@github.com>

Fri, 21 May 2021 03:22:26 +0000 (20:22 -0700)
author Raymond Hettinger <rhettinger@users.noreply.github.com>
Fri, 21 May 2021 03:22:26 +0000 (20:22 -0700)
committer GitHub <noreply@github.com>
Fri, 21 May 2021 03:22:26 +0000 (20:22 -0700)
diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst

index a65c9840b8113ab29f02da3e2ca765fc5f45a746..fce4cffd8c69b1e90840b1aa10c79e4fd0bbc80c 100644 (file)
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -43,7 +43,7 @@ or sample.
  
  =======================  ===============================================================
  :func:`mean`             Arithmetic mean ("average") of data.
-:func:`fmean`            Fast, floating point arithmetic mean.
+:func:`fmean`            Fast, floating point arithmetic mean, with optional weighting.
  :func:`geometric_mean`   Geometric mean of data.
  :func:`harmonic_mean`    Harmonic mean of data.
  :func:`median`           Median (middle value) of data.
@@ -128,7 +128,7 @@ However, for reading convenience, most of the examples show sorted sequences.
        ``mean(data)`` is equivalent to calculating the true population mean μ.
  
  
-.. function:: fmean(data)
+.. function:: fmean(data, weights=None)
  
     Convert *data* to floats and compute the arithmetic mean.
  
@@ -141,8 +141,25 @@ However, for reading convenience, most of the examples show sorted sequences.
        >>> fmean([3.5, 4.0, 5.25])
        4.25
  
+   Optional weighting is supported.  For example, a professor assigns a
+   grade for a course by weighting quizzes at 20%, homework at 20%, a
+   midterm exam at 30%, and a final exam at 30%:
+
+   .. doctest::
+
+      >>> grades = [85, 92, 83, 91]
+      >>> weights = [0.20, 0.20, 0.30, 0.30]
+      >>> fmean(grades, weights)
+      87.6
+
+   If *weights* is supplied, it must be the same length as the *data* or
+   a :exc:`ValueError` will be raised.
+
     .. versionadded:: 3.8
  
+   .. versionchanged:: 3.11
+      Added support for *weights*.
+
  
  .. function:: geometric_mean(data)
  
diff --git a/Lib/statistics.py b/Lib/statistics.py

index 5d38f855020f4335d40e67024dc82fcc40d56102..bd3813ce1a4f190016887142bed5c47241cdcf25 100644 (file)
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -136,7 +136,7 @@ from decimal import Decimal
  from itertools import groupby, repeat
  from bisect import bisect_left, bisect_right
  from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum
-from operator import itemgetter
+from operator import itemgetter, mul
  from collections import Counter, namedtuple
  
  # === Exceptions ===
@@ -345,7 +345,7 @@ def mean(data):
      return _convert(total / n, T)
  
  
-def fmean(data):
+def fmean(data, weights=None):
      """Convert data to floats and compute the arithmetic mean.
  
      This runs faster than the mean() function and it always returns a float.
@@ -363,13 +363,24 @@ def fmean(data):
              nonlocal n
              for n, x in enumerate(iterable, start=1):
                  yield x
-        total = fsum(count(data))
-    else:
+        data = count(data)
+    if weights is None:
          total = fsum(data)
-    try:
+        if not n:
+            raise StatisticsError('fmean requires at least one data point')
          return total / n
-    except ZeroDivisionError:
-        raise StatisticsError('fmean requires at least one data point') from None
+    try:
+        num_weights = len(weights)
+    except TypeError:
+        weights = list(weights)
+        num_weights = len(weights)
+    num = fsum(map(mul, data, weights))
+    if n != num_weights:
+        raise StatisticsError('data and weights must be the same length')
+    den = fsum(weights)
+    if not den:
+        raise StatisticsError('sum of weights must be non-zero')
+    return num / den
  
  
  def geometric_mean(data):
diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py

index 70d269dea732dec30a40308e5ec3816f6ce3873b..3e6e17afe1c1b600b88d7f5c9464bfdcd179a413 100644 (file)
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -1972,6 +1972,27 @@ class TestFMean(unittest.TestCase):
          with self.assertRaises(ValueError):
              fmean([Inf, -Inf])
  
+    def test_weights(self):
+        fmean = statistics.fmean
+        StatisticsError = statistics.StatisticsError
+        self.assertEqual(
+            fmean([10, 10, 10, 50], [0.25] * 4),
+            fmean([10, 10, 10, 50]))
+        self.assertEqual(
+            fmean([10, 10, 20], [0.25, 0.25, 0.50]),
+            fmean([10, 10, 20, 20]))
+        self.assertEqual(                           # inputs are iterators
+            fmean(iter([10, 10, 20]), iter([0.25, 0.25, 0.50])),
+            fmean([10, 10, 20, 20]))
+        with self.assertRaises(StatisticsError):
+            fmean([10, 20, 30], [1, 2])             # unequal lengths
+        with self.assertRaises(StatisticsError):
+            fmean(iter([10, 20, 30]), iter([1, 2])) # unequal lengths
+        with self.assertRaises(StatisticsError):
+            fmean([10, 20], [-1, 1])                # sum of weights is zero
+        with self.assertRaises(StatisticsError):
+            fmean(iter([10, 20]), iter([-1, 1]))    # sum of weights is zero
+
  
  # === Tests for variances and standard deviations ===
  
diff --git a/Misc/NEWS.d/next/Library/2021-05-16-11-57-38.bpo-44150.xAhhik.rst b/Misc/NEWS.d/next/Library/2021-05-16-11-57-38.bpo-44150.xAhhik.rst

new file mode 100644 (file)

index 0000000..f4c2786
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-05-16-11-57-38.bpo-44150.xAhhik.rst
@@ -0,0 +1 @@
+Add optional *weights* argument to statistics.fmean().
author	Raymond Hettinger <rhettinger@users.noreply.github.com>
	Fri, 21 May 2021 03:22:26 +0000 (20:22 -0700)
committer	GitHub <noreply@github.com>
	Fri, 21 May 2021 03:22:26 +0000 (20:22 -0700)
Doc/library/statistics.rst		patch \| blob \| blame \| history
Lib/statistics.py		patch \| blob \| blame \| history
Lib/test/test_statistics.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2021-05-16-11-57-38.bpo-44150.xAhhik.rst	[new file with mode: 0644]	patch \| blob