From: Raymond Hettinger Date: Fri, 2 Jan 2009 21:20:38 +0000 (+0000) Subject: Issue #4615. Document how to use itertools for de-duping. X-Git-Tag: v2.6.2c1~296 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=d84616535c8084e2c83dccd9418227de7ab9bfa9;p=thirdparty%2FPython%2Fcpython.git Issue #4615. Document how to use itertools for de-duping. --- diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index db38e697d711..348c26488078 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -687,3 +687,27 @@ which incur interpreter overhead. return indices[i:] = [indices[i] + 1] * (r - i) yield tuple(pool[i] for i in indices) + + def unique_everseen(iterable, key=None): + "List unique elements, preserving order. Remember all elements ever seen." + # unique_everseen('AAAABBBCCDAABBB') --> A B C D + # unique_everseen('ABBCcAD', str.lower) --> A B C D + seen = set() + seen_add = seen.add + if key is None: + for element in iterable: + if element not in seen: + seen_add(element) + yield element + else: + for element in iterable: + k = key(element) + if k not in seen: + seen_add(k) + yield element + + def unique_justseen(iterable, key=None): + "List unique elements, preserving order. Remember only the element just seen." + # unique_justseen('AAAABBBCCDAABBB') --> A B C D A B + # unique_justseen('ABBCcAD', str.lower) --> A B C A D + return imap(next, imap(itemgetter(1), groupby(iterable, key))) diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index 6912ac742157..029498aa7867 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -1277,6 +1277,30 @@ Samuele ... indices[i:] = [indices[i] + 1] * (r - i) ... yield tuple(pool[i] for i in indices) +>>> def unique_everseen(iterable, key=None): +... "List unique elements, preserving order. Remember all elements ever seen." +... # unique_everseen('AAAABBBCCDAABBB') --> A B C D +... # unique_everseen('ABBCcAD', str.lower) --> A B C D +... seen = set() +... seen_add = seen.add +... if key is None: +... for element in iterable: +... if element not in seen: +... seen_add(element) +... yield element +... else: +... for element in iterable: +... k = key(element) +... if k not in seen: +... seen_add(k) +... yield element + +>>> def unique_justseen(iterable, key=None): +... "List unique elements, preserving order. Remember only the element just seen." +... # unique_justseen('AAAABBBCCDAABBB') --> A B C D A B +... # unique_justseen('ABBCcAD', str.lower) --> A B C A D +... return imap(next, imap(itemgetter(1), groupby(iterable, key))) + This is not part of the examples but it tests to make sure the definitions perform as purported. @@ -1339,6 +1363,18 @@ perform as purported. >>> list(combinations_with_replacement('abc', 2)) [('a', 'a'), ('a', 'b'), ('a', 'c'), ('b', 'b'), ('b', 'c'), ('c', 'c')] +>>> list(unique_everseen('AAAABBBCCDAABBB')) +['A', 'B', 'C', 'D'] + +>>> list(unique_everseen('ABBCcAD', str.lower)) +['A', 'B', 'C', 'D'] + +>>> list(unique_justseen('AAAABBBCCDAABBB')) +['A', 'B', 'C', 'D', 'A', 'B'] + +>>> list(unique_justseen('ABBCcAD', str.lower)) +['A', 'B', 'C', 'A', 'D'] + """ __test__ = {'libreftest' : libreftest}