From: Raymond Hettinger Date: Fri, 26 Sep 2025 05:04:49 +0000 (-0500) Subject: gh-138682: Add symmetric difference to Counter (gh-138766) X-Git-Tag: v3.15.0a1~201 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7909b304931ddbded5140985fc51999f4b0c7e42;p=thirdparty%2FPython%2Fcpython.git gh-138682: Add symmetric difference to Counter (gh-138766) --- diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index fdd31799bd90..52178d6c526a 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -367,9 +367,11 @@ Several mathematical operations are provided for combining :class:`Counter` objects to produce multisets (counters that have counts greater than zero). Addition and subtraction combine counters by adding or subtracting the counts of corresponding elements. Intersection and union return the minimum and -maximum of corresponding counts. Equality and inclusion compare -corresponding counts. Each operation can accept inputs with signed -counts, but the output will exclude results with counts of zero or less. +maximum of corresponding counts. Symmetric difference returns the difference +between the maximum and minimum of the corresponding counts. Equality and +inclusion compare corresponding counts. Each operation can accept inputs +with signed counts, but the output will exclude results with counts of zero +or below. .. doctest:: @@ -383,6 +385,8 @@ counts, but the output will exclude results with counts of zero or less. Counter({'a': 1, 'b': 1}) >>> c | d # union: max(c[x], d[x]) Counter({'a': 3, 'b': 2}) + >>> c ^ d # max(c[x], d[x]) - min(c[x], d[x]) + Counter({'a': 2, 'b': 1}) >>> c == d # equality: c[x] == d[x] False >>> c <= d # inclusion: c[x] <= d[x] @@ -400,6 +404,9 @@ or subtracting from an empty counter. .. versionadded:: 3.3 Added support for unary plus, unary minus, and in-place multiset operations. +.. versionadded:: next + Added support for the symmetric difference multiset operation, ``c ^ d``. + .. note:: Counters were primarily designed to work with positive integers to represent diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index d5d387d9a0aa..72c55eabb54d 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -294,6 +294,14 @@ New modules Improved modules ================ +collections +----------- + +* Added :meth:`!collections.Counter.__xor__` and + :meth:`!collections.Counter.__ixor__` to compute the symmetric difference + between :class:`~collections.Counter` objects. + (Contributed by Raymond Hettinger in :gh:`138682`.) + collections.abc --------------- diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index b8653f40a942..25ac4d1d524b 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -796,6 +796,7 @@ class Counter(dict): # set(cp - cq) == sp - sq # set(cp | cq) == sp | sq # set(cp & cq) == sp & sq + # set(cp ^ cq) == sp ^ sq def __eq__(self, other): 'True if all counts agree. Missing counts are treated as zero.' @@ -908,6 +909,33 @@ class Counter(dict): result[elem] = newcount return result + def __xor__(self, other): + '''Symmetric difference. Absolute value of count differences. + + The symmetric difference p ^ q is equivalent to: + + (p - q) | (q - p). + + For each element, symmetric difference gives the same result as: + + max(p[elem], q[elem]) - min(p[elem], q[elem]) + + >>> Counter(a=5, b=3, c=2, d=2) ^ Counter(a=1, b=3, c=5, e=1) + Counter({'a': 4, 'c': 3, 'd': 2, 'e': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem, count in self.items(): + newcount = abs(count - other[elem]) + if newcount: + result[elem] = newcount + for elem, count in other.items(): + if elem not in self and count: + result[elem] = abs(count) + return result + def __pos__(self): 'Adds an empty counter, effectively stripping negative and zero counts' result = Counter() @@ -990,6 +1018,22 @@ class Counter(dict): self[elem] = other_count return self._keep_positive() + def __ixor__(self, other): + '''Inplace symmetric difference. Absolute value of count differences. + + >>> c = Counter(a=5, b=3, c=2, d=2) + >>> c ^= Counter(a=1, b=3, c=5, e=1) + >>> c + Counter({'a': 4, 'c': 3, 'd': 2, 'e': 1}) + + ''' + for elem, count in self.items(): + self[elem] = abs(count - other[elem]) + for elem, count in other.items(): + if elem not in self: + self[elem] = abs(count) + return self._keep_positive() + ######################################################################## ### ChainMap diff --git a/Lib/test/test_collections.py b/Lib/test/test_collections.py index 76995c52b1a3..225952392528 100644 --- a/Lib/test/test_collections.py +++ b/Lib/test/test_collections.py @@ -2179,6 +2179,7 @@ class TestCounter(unittest.TestCase): self.assertTrue(correctly_ordered(p - q)) self.assertTrue(correctly_ordered(p | q)) self.assertTrue(correctly_ordered(p & q)) + self.assertTrue(correctly_ordered(p ^ q)) p, q = Counter(ps), Counter(qs) p += q @@ -2196,6 +2197,10 @@ class TestCounter(unittest.TestCase): p &= q self.assertTrue(correctly_ordered(p)) + p, q = Counter(ps), Counter(qs) + p ^= q + self.assertTrue(correctly_ordered(p)) + p, q = Counter(ps), Counter(qs) p.update(q) self.assertTrue(correctly_ordered(p)) @@ -2278,6 +2283,7 @@ class TestCounter(unittest.TestCase): (Counter.__sub__, lambda x, y: max(0, x-y)), (Counter.__or__, lambda x, y: max(0,x,y)), (Counter.__and__, lambda x, y: max(0, min(x,y))), + (Counter.__xor__, lambda x, y: max(0, max(x,y) - min(x,y))), ]: result = counterop(p, q) for x in elements: @@ -2295,6 +2301,7 @@ class TestCounter(unittest.TestCase): (Counter.__sub__, set.__sub__), (Counter.__or__, set.__or__), (Counter.__and__, set.__and__), + (Counter.__xor__, set.__xor__), ]: counter_result = counterop(p, q) set_result = setop(set(p.elements()), set(q.elements())) @@ -2313,6 +2320,7 @@ class TestCounter(unittest.TestCase): (Counter.__isub__, Counter.__sub__), (Counter.__ior__, Counter.__or__), (Counter.__iand__, Counter.__and__), + (Counter.__ixor__, Counter.__xor__), ]: c = p.copy() c_id = id(c) @@ -2388,6 +2396,7 @@ class TestCounter(unittest.TestCase): self.assertEqual(set(cp - cq), sp - sq) self.assertEqual(set(cp | cq), sp | sq) self.assertEqual(set(cp & cq), sp & sq) + self.assertEqual(set(cp ^ cq), sp ^ sq) self.assertEqual(cp == cq, sp == sq) self.assertEqual(cp != cq, sp != sq) self.assertEqual(cp <= cq, sp <= sq) @@ -2415,6 +2424,40 @@ class TestCounter(unittest.TestCase): self.assertTrue(Counter(a=3, b=2, c=0) > Counter('aab')) self.assertFalse(Counter(a=2, b=1, c=0) > Counter('aab')) + def test_symmetric_difference(self): + population = (-4, -3, -2, -1, 0, 1, 2, 3, 4) + + for a, b1, b2, c in product(population, repeat=4): + p = Counter(a=a, b=b1) + q = Counter(b=b2, c=c) + r = p ^ q + + # Elementwise invariants + for k in ('a', 'b', 'c'): + self.assertEqual(r[k], max(p[k], q[k]) - min(p[k], q[k])) + self.assertEqual(r[k], abs(p[k] - q[k])) + + # Invariant for all positive, negative, and zero counts + self.assertEqual(r, (p - q) | (q - p)) + + # Invariant for non-negative counts + if a >= 0 and b1 >= 0 and b2 >= 0 and c >= 0: + self.assertEqual(r, (p | q) - (p & q)) + + # Zeros and negatives eliminated + self.assertTrue(all(value > 0 for value in r.values())) + + # Output preserves input order: p first and then q + keys = list(p) + list(q) + indices = [keys.index(k) for k in r] + self.assertEqual(indices, sorted(indices)) + + # Inplace operation matches binary operation + pp = Counter(p) + qq = Counter(q) + pp ^= qq + self.assertEqual(pp, r) + def load_tests(loader, tests, pattern): tests.addTest(doctest.DocTestSuite(collections)) diff --git a/Misc/NEWS.d/next/Library/2025-09-10-13-32-25.gh-issue-138682.iExqx1.rst b/Misc/NEWS.d/next/Library/2025-09-10-13-32-25.gh-issue-138682.iExqx1.rst new file mode 100644 index 000000000000..fe6bdcd894be --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-09-10-13-32-25.gh-issue-138682.iExqx1.rst @@ -0,0 +1 @@ +Added symmetric difference support to :class:`collections.Counter` objects.