From 244300162d2e863a0588d1754e224d68931ada37 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 20 May 2026 13:22:57 +0200 Subject: [PATCH] gh-149807: Fix hash(frozendict): compute (key, value) pair hash (#149841) --- Lib/test/test_dict.py | 25 ++++++++++ ...-05-14-19-41-03.gh-issue-149807.IwGaCo.rst | 2 + Objects/dictobject.c | 50 +++++++++++++++---- Objects/tupleobject.c | 3 ++ 4 files changed, 69 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-05-14-19-41-03.gh-issue-149807.IwGaCo.rst diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py index 4efb066d4fd0..f26586809238 100644 --- a/Lib/test/test_dict.py +++ b/Lib/test/test_dict.py @@ -1903,10 +1903,35 @@ class FrozenDictTests(unittest.TestCase): self.assertEqual(hash(frozendict(x=1, y=2)), hash(frozendict(y=2, x=1))) + # Check that hash() computes the hash of (key, value) pairs + cases = [ + frozendict(a=False, b=True, c=True), + frozendict(a=True, b=False, c=True), + frozendict(a=True, b=True, c=False), + frozendict({False: "a", "b": True, "c": True}), + frozendict({"a": "b", False: True, True: "c"}), + ] + hashes = {hash(fd) for fd in cases} + self.assertEqual(len(hashes), len(cases)) + fd = frozendict(x=[1], y=[2]) with self.assertRaisesRegex(TypeError, "unhashable type: 'list'"): hash(fd) + @support.cpython_only + def test_hash_cpython(self): + # Check that hash(frozendict) implementation is: + # hash(frozenset(fd.items())) + for fd in ( + frozendict(), + frozendict(x=1, y=2), + frozendict(y=2, x=1), + frozendict(a=False, b=True, c=True), + frozendict.fromkeys('abc'), + ): + with self.subTest(fd=fd): + self.assertEqual(hash(fd), hash(frozenset(fd.items()))) + def test_fromkeys(self): self.assertEqual(frozendict.fromkeys('abc'), frozendict(a=None, b=None, c=None)) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-05-14-19-41-03.gh-issue-149807.IwGaCo.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-14-19-41-03.gh-issue-149807.IwGaCo.rst new file mode 100644 index 000000000000..a94c737e7361 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-14-19-41-03.gh-issue-149807.IwGaCo.rst @@ -0,0 +1,2 @@ +Fix ``hash(frozendict)``: compute the hash of each ``(key, value)`` pair +correctly. Patch by Victor Stinner. diff --git a/Objects/dictobject.c b/Objects/dictobject.c index a7d67812bec9..3830fedd42bd 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -8230,6 +8230,39 @@ _shuffle_bits(Py_uhash_t h) return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL; } +// Compute hash((key, value)). +// Code copied from tuple_hash(). +static Py_hash_t +frozendict_pair_hash(Py_hash_t key_hash, PyObject *value) +{ + assert(key_hash != -1); + + const Py_ssize_t len = 2; + Py_uhash_t acc = _PyTuple_HASH_XXPRIME_5; + + Py_uhash_t lane = key_hash; + acc += lane * _PyTuple_HASH_XXPRIME_2; + acc = _PyTuple_HASH_XXROTATE(acc); + acc *= _PyTuple_HASH_XXPRIME_1; + + lane = PyObject_Hash(value); + if (lane == (Py_uhash_t)-1) { + return -1; + } + acc += lane * _PyTuple_HASH_XXPRIME_2; + acc = _PyTuple_HASH_XXROTATE(acc); + acc *= _PyTuple_HASH_XXPRIME_1; + + /* Add input length, mangled to keep the historical value of hash(()). */ + acc += len ^ (_PyTuple_HASH_XXPRIME_5 ^ 3527539UL); + + if (acc == (Py_uhash_t)-1) { + acc = 1546275796; + } + return acc; +} + + // Code copied from frozenset_hash() static Py_hash_t frozendict_hash(PyObject *op) @@ -8243,20 +8276,15 @@ frozendict_hash(PyObject *op) PyDictObject *mp = _PyAnyDict_CAST(op); Py_uhash_t hash = 0; - PyObject *key, *value; // borrowed refs + PyObject *value; // borrowed ref Py_ssize_t pos = 0; - while (PyDict_Next(op, &pos, &key, &value)) { - Py_hash_t key_hash = PyObject_Hash(key); - if (key_hash == -1) { - return -1; - } - hash ^= _shuffle_bits(key_hash); - - Py_hash_t value_hash = PyObject_Hash(value); - if (value_hash == -1) { + Py_hash_t key_hash; + while (_PyDict_Next(op, &pos, NULL, &value, &key_hash)) { + Py_hash_t pair_hash = frozendict_pair_hash(key_hash, value); + if (pair_hash == -1) { return -1; } - hash ^= _shuffle_bits(value_hash); + hash ^= _shuffle_bits(pair_hash); } /* Factor in the number of active entries */ diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 942300024275..6120e70f3eee 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -363,6 +363,9 @@ error: https://github.com/Cyan4973/xxHash/blob/master/doc/xxhash_spec.md The constants for the hash function are defined in pycore_tuple.h. + + If you update this code, update also frozendict_pair_hash() which copied + this code. */ static Py_hash_t -- 2.47.3