]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.13] gh-132762: Fix underallocation bug in `dict.fromkeys()`(gh-133627) (gh-133686)
authorSam Gross <colesbury@gmail.com>
Thu, 8 May 2025 17:40:05 +0000 (13:40 -0400)
committerGitHub <noreply@github.com>
Thu, 8 May 2025 17:40:05 +0000 (17:40 +0000)
The function `dict_set_fromkeys()` adds elements of a set to an existing
dictionary. The size of the expanded dictionary was estimated with
`PySet_GET_SIZE(iterable)`, which did not take into account the size of the
existing dictionary.
(cherry picked from commit 421ba589d02b53131f793889d221ef3b1f1410a4)

Co-authored-by: Angela Liss <59097311+angela-tarantula@users.noreply.github.com>
Lib/test/test_dict.py
Misc/NEWS.d/next/Core and Builtins/2025-05-08-13-48-02.gh-issue-132762.tKbygC.rst [new file with mode: 0644]
Objects/dictobject.c

index 4030716efb51f9cc893910e8963c4a6b231f8cce..587099a6f606635720e1eba897fe968722f01970 100644 (file)
@@ -312,17 +312,34 @@ class DictTest(unittest.TestCase):
         self.assertRaises(Exc, baddict2.fromkeys, [1])
 
         # test fast path for dictionary inputs
+        res = dict(zip(range(6), [0]*6))
         d = dict(zip(range(6), range(6)))
-        self.assertEqual(dict.fromkeys(d, 0), dict(zip(range(6), [0]*6)))
-
+        self.assertEqual(dict.fromkeys(d, 0), res)
+        # test fast path for set inputs
+        d = set(range(6))
+        self.assertEqual(dict.fromkeys(d, 0), res)
+        # test slow path for other iterable inputs
+        d = list(range(6))
+        self.assertEqual(dict.fromkeys(d, 0), res)
+
+        # test fast path when object's constructor returns large non-empty dict
         class baddict3(dict):
             def __new__(cls):
                 return d
-        d = {i : i for i in range(10)}
+        d = {i : i for i in range(1000)}
         res = d.copy()
         res.update(a=None, b=None, c=None)
         self.assertEqual(baddict3.fromkeys({"a", "b", "c"}), res)
 
+        # test slow path when object is a proper subclass of dict
+        class baddict4(dict):
+            def __init__(self):
+                dict.__init__(self, d)
+        d = {i : i for i in range(1000)}
+        res = d.copy()
+        res.update(a=None, b=None, c=None)
+        self.assertEqual(baddict4.fromkeys({"a", "b", "c"}), res)
+
     def test_copy(self):
         d = {1: 1, 2: 2, 3: 3}
         self.assertIsNot(d.copy(), d)
diff --git a/Misc/NEWS.d/next/Core and Builtins/2025-05-08-13-48-02.gh-issue-132762.tKbygC.rst b/Misc/NEWS.d/next/Core and Builtins/2025-05-08-13-48-02.gh-issue-132762.tKbygC.rst
new file mode 100644 (file)
index 0000000..80b830e
--- /dev/null
@@ -0,0 +1 @@
+:meth:`~dict.fromkeys` no longer loops forever when adding a small set of keys to a large base dict. Patch by Angela Liss.
index 680d6beb76057182dc0929d412662cce855ffa9e..355986248bac2c566b29ce8aabbf502adbe85a0d 100644 (file)
@@ -3057,9 +3057,10 @@ dict_set_fromkeys(PyInterpreterState *interp, PyDictObject *mp,
     Py_ssize_t pos = 0;
     PyObject *key;
     Py_hash_t hash;
-
-    if (dictresize(interp, mp,
-                    estimate_log2_keysize(PySet_GET_SIZE(iterable)), 0)) {
+    uint8_t new_size = Py_MAX(
+        estimate_log2_keysize(PySet_GET_SIZE(iterable)),
+        DK_LOG_SIZE(mp->ma_keys));
+    if (dictresize(interp, mp, new_size, 0)) {
         Py_DECREF(mp);
         return NULL;
     }