From b16886528ee43ad8618e81b833be022710c4a8d2 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 22 Apr 2026 11:52:41 -0500 Subject: [PATCH] Additional itertool recipes for running statistics (gh-148879) --- Doc/library/itertools.rst | 75 ++++++++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 9 deletions(-) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index 5a0ac60ab7d2..06f8bf2a8b6f 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -833,6 +833,7 @@ and :term:`generators ` which incur interpreter overhead. from collections import Counter, deque from contextlib import suppress from functools import reduce + from heapq import heappush, heappushpop, heappush_max, heappushpop_max from math import comb, isqrt, prod, sumprod from operator import getitem, is_not, itemgetter, mul, neg, truediv @@ -848,11 +849,6 @@ and :term:`generators ` which incur interpreter overhead. # prepend(1, [2, 3, 4]) → 1 2 3 4 return chain([value], iterable) - def running_mean(iterable): - "Yield the average of all values seen so far." - # running_mean([8.5, 9.5, 7.5, 6.5]) → 8.5 9.0 8.5 8.0 - return map(truediv, accumulate(iterable), count(1)) - def repeatfunc(function, times=None, *args): "Repeat calls to a function with specified arguments." if times is None: @@ -1150,6 +1146,49 @@ and :term:`generators ` which incur interpreter overhead. return n + # ==== Running statistics ==== + + def running_mean(iterable): + "Average of values seen so far." + # running_mean([37, 33, 38, 28]) → 37 35 36 34 + return map(truediv, accumulate(iterable), count(1)) + + def running_min(iterable): + "Smallest of values seen so far." + # running_min([37, 33, 38, 28]) → 37 33 33 28 + return accumulate(iterable, func=min) + + def running_max(iterable): + "Largest of values seen so far." + # running_max([37, 33, 38, 28]) → 37 37 38 38 + return accumulate(iterable, func=max) + + def running_median(iterable): + "Median of values seen so far." + # running_median([37, 33, 38, 28]) → 37 35 37 35 + read = iter(iterable).__next__ + lo = [] # max-heap + hi = [] # min-heap the same size as or one smaller than lo + with suppress(StopIteration): + while True: + heappush_max(lo, heappushpop(hi, read())) + yield lo[0] + heappush(hi, heappushpop_max(lo, read())) + yield (lo[0] + hi[0]) / 2 + + def running_statistics(iterable): + "Aggregate statistics for values seen so far." + # Generate tuples: (size, minimum, median, maximum, mean) + t0, t1, t2, t3 = tee(iterable, 4) + return zip( + count(1), + running_min(t0), + running_median(t1), + running_max(t2), + running_mean(t3), + ) + + .. doctest:: :hide: @@ -1226,10 +1265,6 @@ and :term:`generators ` which incur interpreter overhead. [(0, 'a'), (1, 'b'), (2, 'c')] - >>> list(running_mean([8.5, 9.5, 7.5, 6.5])) - [8.5, 9.0, 8.5, 8.0] - - >>> for _ in loops(5): ... print('hi') ... @@ -1789,6 +1824,28 @@ and :term:`generators ` which incur interpreter overhead. True + >>> list(running_mean([8.5, 9.5, 7.5, 6.5])) + [8.5, 9.0, 8.5, 8.0] + >>> list(running_mean([37, 33, 38, 28])) + [37.0, 35.0, 36.0, 34.0] + + + >>> list(running_min([37, 33, 38, 28])) + [37, 33, 33, 28] + + + >>> list(running_max([37, 33, 38, 28])) + [37, 37, 38, 38] + + + >>> list(running_median([37, 33, 38, 28])) + [37, 35.0, 37, 35.0] + + + >>> list(running_statistics([37, 33, 38, 28])) + [(1, 37, 37, 37, 37.0), (2, 33, 35.0, 37, 35.0), (3, 33, 37, 38, 36.0), (4, 28, 35.0, 38, 34.0)] + + .. testcode:: :hide: -- 2.47.3