]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.14] Additional itertool recipes for running statistics (gh-148879) (gh-148949)
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Fri, 24 Apr 2026 05:52:35 +0000 (07:52 +0200)
committerGitHub <noreply@github.com>
Fri, 24 Apr 2026 05:52:35 +0000 (05:52 +0000)
Doc/library/itertools.rst

index 1be8406bbdf0c14f0e60743b39a5d84b3a5d5dcc..8bfe5ac31e8990d8e81044a74a69bedc58dbc173 100644 (file)
@@ -836,6 +836,7 @@ and :term:`generators <generator>` which incur interpreter overhead.
    from collections import Counter, deque
    from contextlib import suppress
    from functools import reduce
+   from heapq import heappush, heappushpop, heappush_max, heappushpop_max
    from math import comb, isqrt, prod, sumprod
    from operator import getitem, is_not, itemgetter, mul, neg, truediv
 
@@ -851,11 +852,6 @@ and :term:`generators <generator>` which incur interpreter overhead.
        # prepend(1, [2, 3, 4]) → 1 2 3 4
        return chain([value], iterable)
 
-   def running_mean(iterable):
-       "Yield the average of all values seen so far."
-       # running_mean([8.5, 9.5, 7.5, 6.5]) → 8.5 9.0 8.5 8.0
-       return map(truediv, accumulate(iterable), count(1))
-
    def repeatfunc(function, times=None, *args):
        "Repeat calls to a function with specified arguments."
        if times is None:
@@ -1153,6 +1149,49 @@ and :term:`generators <generator>` which incur interpreter overhead.
        return n
 
 
+   # ==== Running statistics ====
+
+   def running_mean(iterable):
+       "Average of values seen so far."
+       # running_mean([37, 33, 38, 28]) → 37 35 36 34
+       return map(truediv, accumulate(iterable), count(1))
+
+   def running_min(iterable):
+       "Smallest of values seen so far."
+       # running_min([37, 33, 38, 28]) → 37 33 33 28
+       return accumulate(iterable, func=min)
+
+   def running_max(iterable):
+       "Largest of values seen so far."
+       # running_max([37, 33, 38, 28]) → 37 37 38 38
+       return accumulate(iterable, func=max)
+
+   def running_median(iterable):
+       "Median of values seen so far."
+       # running_median([37, 33, 38, 28]) → 37 35 37 35
+       read = iter(iterable).__next__
+       lo = []  # max-heap
+       hi = []  # min-heap the same size as or one smaller than lo
+       with suppress(StopIteration):
+           while True:
+               heappush_max(lo, heappushpop(hi, read()))
+               yield lo[0]
+               heappush(hi, heappushpop_max(lo, read()))
+               yield (lo[0] + hi[0]) / 2
+
+   def running_statistics(iterable):
+       "Aggregate statistics for values seen so far."
+       # Generate tuples:  (size, minimum, median, maximum, mean)
+       t0, t1, t2, t3 = tee(iterable, 4)
+       return zip(
+           count(1),
+           running_min(t0),
+           running_median(t1),
+           running_max(t2),
+           running_mean(t3),
+       )
+
+
 .. doctest::
     :hide:
 
@@ -1229,10 +1268,6 @@ and :term:`generators <generator>` which incur interpreter overhead.
     [(0, 'a'), (1, 'b'), (2, 'c')]
 
 
-    >>> list(running_mean([8.5, 9.5, 7.5, 6.5]))
-    [8.5, 9.0, 8.5, 8.0]
-
-
     >>> for _ in loops(5):
     ...     print('hi')
     ...
@@ -1792,6 +1827,28 @@ and :term:`generators <generator>` which incur interpreter overhead.
     True
 
 
+    >>> list(running_mean([8.5, 9.5, 7.5, 6.5]))
+    [8.5, 9.0, 8.5, 8.0]
+    >>> list(running_mean([37, 33, 38, 28]))
+    [37.0, 35.0, 36.0, 34.0]
+
+
+    >>> list(running_min([37, 33, 38, 28]))
+    [37, 33, 33, 28]
+
+
+    >>> list(running_max([37, 33, 38, 28]))
+    [37, 37, 38, 38]
+
+
+    >>> list(running_median([37, 33, 38, 28]))
+    [37, 35.0, 37, 35.0]
+
+
+    >>> list(running_statistics([37, 33, 38, 28]))
+    [(1, 37, 37, 37, 37.0), (2, 33, 35.0, 37, 35.0), (3, 33, 37, 38, 36.0), (4, 28, 35.0, 38, 34.0)]
+
+
 .. testcode::
     :hide: