]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
bpo-44150: Support optional weights parameter for fmean() (GH-26175)
authorRaymond Hettinger <rhettinger@users.noreply.github.com>
Fri, 21 May 2021 03:22:26 +0000 (20:22 -0700)
committerGitHub <noreply@github.com>
Fri, 21 May 2021 03:22:26 +0000 (20:22 -0700)
Doc/library/statistics.rst
Lib/statistics.py
Lib/test/test_statistics.py
Misc/NEWS.d/next/Library/2021-05-16-11-57-38.bpo-44150.xAhhik.rst [new file with mode: 0644]

index a65c9840b8113ab29f02da3e2ca765fc5f45a746..fce4cffd8c69b1e90840b1aa10c79e4fd0bbc80c 100644 (file)
@@ -43,7 +43,7 @@ or sample.
 
 =======================  ===============================================================
 :func:`mean`             Arithmetic mean ("average") of data.
-:func:`fmean`            Fast, floating point arithmetic mean.
+:func:`fmean`            Fast, floating point arithmetic mean, with optional weighting.
 :func:`geometric_mean`   Geometric mean of data.
 :func:`harmonic_mean`    Harmonic mean of data.
 :func:`median`           Median (middle value) of data.
@@ -128,7 +128,7 @@ However, for reading convenience, most of the examples show sorted sequences.
       ``mean(data)`` is equivalent to calculating the true population mean μ.
 
 
-.. function:: fmean(data)
+.. function:: fmean(data, weights=None)
 
    Convert *data* to floats and compute the arithmetic mean.
 
@@ -141,8 +141,25 @@ However, for reading convenience, most of the examples show sorted sequences.
       >>> fmean([3.5, 4.0, 5.25])
       4.25
 
+   Optional weighting is supported.  For example, a professor assigns a
+   grade for a course by weighting quizzes at 20%, homework at 20%, a
+   midterm exam at 30%, and a final exam at 30%:
+
+   .. doctest::
+
+      >>> grades = [85, 92, 83, 91]
+      >>> weights = [0.20, 0.20, 0.30, 0.30]
+      >>> fmean(grades, weights)
+      87.6
+
+   If *weights* is supplied, it must be the same length as the *data* or
+   a :exc:`ValueError` will be raised.
+
    .. versionadded:: 3.8
 
+   .. versionchanged:: 3.11
+      Added support for *weights*.
+
 
 .. function:: geometric_mean(data)
 
index 5d38f855020f4335d40e67024dc82fcc40d56102..bd3813ce1a4f190016887142bed5c47241cdcf25 100644 (file)
@@ -136,7 +136,7 @@ from decimal import Decimal
 from itertools import groupby, repeat
 from bisect import bisect_left, bisect_right
 from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum
-from operator import itemgetter
+from operator import itemgetter, mul
 from collections import Counter, namedtuple
 
 # === Exceptions ===
@@ -345,7 +345,7 @@ def mean(data):
     return _convert(total / n, T)
 
 
-def fmean(data):
+def fmean(data, weights=None):
     """Convert data to floats and compute the arithmetic mean.
 
     This runs faster than the mean() function and it always returns a float.
@@ -363,13 +363,24 @@ def fmean(data):
             nonlocal n
             for n, x in enumerate(iterable, start=1):
                 yield x
-        total = fsum(count(data))
-    else:
+        data = count(data)
+    if weights is None:
         total = fsum(data)
-    try:
+        if not n:
+            raise StatisticsError('fmean requires at least one data point')
         return total / n
-    except ZeroDivisionError:
-        raise StatisticsError('fmean requires at least one data point') from None
+    try:
+        num_weights = len(weights)
+    except TypeError:
+        weights = list(weights)
+        num_weights = len(weights)
+    num = fsum(map(mul, data, weights))
+    if n != num_weights:
+        raise StatisticsError('data and weights must be the same length')
+    den = fsum(weights)
+    if not den:
+        raise StatisticsError('sum of weights must be non-zero')
+    return num / den
 
 
 def geometric_mean(data):
index 70d269dea732dec30a40308e5ec3816f6ce3873b..3e6e17afe1c1b600b88d7f5c9464bfdcd179a413 100644 (file)
@@ -1972,6 +1972,27 @@ class TestFMean(unittest.TestCase):
         with self.assertRaises(ValueError):
             fmean([Inf, -Inf])
 
+    def test_weights(self):
+        fmean = statistics.fmean
+        StatisticsError = statistics.StatisticsError
+        self.assertEqual(
+            fmean([10, 10, 10, 50], [0.25] * 4),
+            fmean([10, 10, 10, 50]))
+        self.assertEqual(
+            fmean([10, 10, 20], [0.25, 0.25, 0.50]),
+            fmean([10, 10, 20, 20]))
+        self.assertEqual(                           # inputs are iterators
+            fmean(iter([10, 10, 20]), iter([0.25, 0.25, 0.50])),
+            fmean([10, 10, 20, 20]))
+        with self.assertRaises(StatisticsError):
+            fmean([10, 20, 30], [1, 2])             # unequal lengths
+        with self.assertRaises(StatisticsError):
+            fmean(iter([10, 20, 30]), iter([1, 2])) # unequal lengths
+        with self.assertRaises(StatisticsError):
+            fmean([10, 20], [-1, 1])                # sum of weights is zero
+        with self.assertRaises(StatisticsError):
+            fmean(iter([10, 20]), iter([-1, 1]))    # sum of weights is zero
+
 
 # === Tests for variances and standard deviations ===
 
diff --git a/Misc/NEWS.d/next/Library/2021-05-16-11-57-38.bpo-44150.xAhhik.rst b/Misc/NEWS.d/next/Library/2021-05-16-11-57-38.bpo-44150.xAhhik.rst
new file mode 100644 (file)
index 0000000..f4c2786
--- /dev/null
@@ -0,0 +1 @@
+Add optional *weights* argument to statistics.fmean().