]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-150027: Avoid copying during construction of `frozenset` objects (GH-150028)
authorPeter Bierma <zintensitydev@gmail.com>
Tue, 19 May 2026 17:57:37 +0000 (13:57 -0400)
committerGitHub <noreply@github.com>
Tue, 19 May 2026 17:57:37 +0000 (13:57 -0400)
Include/internal/pycore_intrinsics.h
Include/internal/pycore_opcode_utils.h
Include/internal/pycore_setobject.h
Lib/opcode.py
Lib/test/test_builtin.py
Lib/test/test_compiler_codegen.py
Misc/NEWS.d/next/Core_and_Builtins/2026-05-18-17-16-51.gh-issue-150027.sJgLvd.rst [new file with mode: 0644]
Objects/setobject.c
Python/codegen.c
Python/intrinsics.c
Python/pylifecycle.c

index 39c2a30f6e979de3e4521393c53b198a07a585ab..59a7b16073f886cf16a79f66604fdb999712f133 100644 (file)
@@ -18,8 +18,9 @@
 #define INTRINSIC_TYPEVARTUPLE                   9
 #define INTRINSIC_SUBSCRIPT_GENERIC             10
 #define INTRINSIC_TYPEALIAS                     11
+#define INTRINSIC_BUILD_FROZENSET               12
 
-#define MAX_INTRINSIC_1                         11
+#define MAX_INTRINSIC_1                         12
 
 
 /* Binary Functions: */
index 3e2c4ae411c925aeb27956e1959786fdee35de89..b20718344b39981449a8d1e766f09586a33821fc 100644 (file)
@@ -80,7 +80,8 @@ extern "C" {
 #define CONSTANT_TRUE 9
 #define CONSTANT_FALSE 10
 #define CONSTANT_MINUS_ONE 11
-#define NUM_COMMON_CONSTANTS 12
+#define CONSTANT_BUILTIN_FROZENSET 12
+#define NUM_COMMON_CONSTANTS 13
 
 /* Values used in the oparg for RESUME */
 #define RESUME_AT_FUNC_START 0
index 24d0135ed1aeca8228e6ef15acc2dfbdb2a21498..92d1a15177f79ebcf5261b465a39fcf453d1d899 100644 (file)
@@ -35,6 +35,9 @@ extern void _PySet_ClearInternal(PySetObject *so);
 
 PyAPI_FUNC(int) _PySet_AddTakeRef(PySetObject *so, PyObject *key);
 
+PyObject *
+_PySet_Freeze(PyObject *set);
+
 #ifdef __cplusplus
 }
 #endif
index 4e60fb5af34f220a15443d8b250bb72b41ef3fd8..bb7824da70e8e5a06e07410c70952e650ce6716d 100644 (file)
@@ -44,7 +44,7 @@ _common_constants = [builtins.AssertionError, builtins.NotImplementedError,
                      builtins.set,
                      # Append-only — must match CONSTANT_* in
                      # Include/internal/pycore_opcode_utils.h.
-                     None, "", True, False, -1]
+                     None, "", True, False, -1, builtins.frozenset]
 _nb_ops = _opcode.get_nb_ops()
 
 hascompare = [opmap["COMPARE_OP"]]
index d62a3a4f17f85e3574a3685b7cd1e28b7fdc80bd..1f52b16948c7038d884f9c9ec9b7bfab405f73e9 100644 (file)
@@ -268,7 +268,10 @@ class BuiltinTest(ComplexesAreIdenticalMixin, unittest.TestCase):
         def f_set():
             return set(2*x for x in [1,2,3])
 
-        funcs = [f_all, f_any, f_tuple, f_list, f_set]
+        def f_frozenset():
+            return frozenset(2*x for x in [1,2,3])
+
+        funcs = [f_all, f_any, f_tuple, f_list, f_set, f_frozenset]
 
         for f in funcs:
             # check that generator code object is not duplicated
@@ -278,35 +281,37 @@ class BuiltinTest(ComplexesAreIdenticalMixin, unittest.TestCase):
 
         # check the overriding the builtins works
 
-        global all, any, tuple, list, set
-        saved = all, any, tuple, list, set
+        global all, any, tuple, list, set, frozenset
+        saved = all, any, tuple, list, set, frozenset
         try:
             all = lambda x : "all"
             any = lambda x : "any"
             tuple = lambda x : "tuple"
             list = lambda x : "list"
             set = lambda x : "set"
+            frozenset = lambda x : "frozenset"
 
             overridden_outputs = [f() for f in funcs]
         finally:
-            all, any, tuple, list, set = saved
+            all, any, tuple, list, set, frozenset = saved
 
-        self.assertEqual(overridden_outputs, ['all', 'any', 'tuple', 'list', 'set'])
+        self.assertEqual(overridden_outputs, ['all', 'any', 'tuple', 'list', 'set', 'frozenset'])
         # Now repeat, overriding the builtins module as well
-        saved = all, any, tuple, list, set
+        saved = all, any, tuple, list, set, frozenset
         try:
             builtins.all = all = lambda x : "all"
             builtins.any = any = lambda x : "any"
             builtins.tuple = tuple = lambda x : "tuple"
             builtins.list = list = lambda x : "list"
             builtins.set = set = lambda x : "set"
+            builtins.frozenset = frozenset = lambda x : "frozenset"
 
             overridden_outputs = [f() for f in funcs]
         finally:
-            all, any, tuple, list, set = saved
-            builtins.all, builtins.any, builtins.tuple, builtins.list, builtins.set = saved
+            all, any, tuple, list, set, frozenset = saved
+            builtins.all, builtins.any, builtins.tuple, builtins.list, builtins.set, builtins.frozenset = saved
 
-        self.assertEqual(overridden_outputs, ['all', 'any', 'tuple', 'list', 'set'])
+        self.assertEqual(overridden_outputs, ['all', 'any', 'tuple', 'list', 'set', 'frozenset'])
 
     def test_builtin_call_async_genexpr_no_crash(self):
         async def f_all():
index d02937c84d953495522bbf7869e0be9e015ed8b0..36058854a41d63e2e49bfa5bc84f32722505eb66 100644 (file)
@@ -161,3 +161,34 @@ class IsolatedCodeGenTests(CodegenTestCase):
         self.assertIsNone(cm.exception.text)
         self.assertEqual(cm.exception.offset, 1)
         self.assertEqual(cm.exception.end_offset, 10)
+
+    def test_frozenset_optimization(self):
+        l1 = self.Label()
+        snippet = "frozenset({1, 2, 3})"
+        expected = [
+            ('RESUME', 0),
+            ('ANNOTATIONS_PLACEHOLDER', None),
+            ('LOAD_NAME', 0),
+            ('COPY', 1),
+            ('LOAD_COMMON_CONSTANT', 12),
+            ('IS_OP', 0),
+            ('POP_JUMP_IF_FALSE', l1),
+            ('POP_TOP', None),
+            ('LOAD_CONST', 1),
+            ('LOAD_CONST', 2),
+            ('LOAD_CONST', 3),
+            ('BUILD_SET', 3),
+            ('CALL_INTRINSIC_1', 12),
+            ('JUMP', 0),
+            l1,
+            ('PUSH_NULL', None),
+            ('LOAD_CONST', 1),
+            ('LOAD_CONST', 2),
+            ('LOAD_CONST', 3),
+            ('BUILD_SET', 3),
+            ('CALL', 1),
+            ('POP_TOP', None),
+            ('LOAD_CONST', 0),
+            ('RETURN_VALUE', None)
+        ]
+        self.codegen_test(snippet, expected)
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-05-18-17-16-51.gh-issue-150027.sJgLvd.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-18-17-16-51.gh-issue-150027.sJgLvd.rst
new file mode 100644 (file)
index 0000000..6644610
--- /dev/null
@@ -0,0 +1,2 @@
+Improve performance of :class:`frozenset` objects by avoiding copies during
+construction.
index 1e63056360455296f46b99623c5725a18c516ed1..a1f654f0715bf39f1dce3ac1115ad92eba460764 100644 (file)
@@ -1545,6 +1545,16 @@ set_swap_bodies(PySetObject *a, PySetObject *b)
     FT_ATOMIC_STORE_PTR_RELEASE(b->table, b_table);
 }
 
+PyObject *
+_PySet_Freeze(PyObject *set)
+{
+    assert(set != NULL);
+    assert(PySet_CheckExact(set));
+    assert(_PyObject_IsUniquelyReferenced(set));
+    set->ob_type = &PyFrozenSet_Type;
+    return Py_NewRef(set);
+}
+
 /*[clinic input]
 @critical_section
 set.copy
index 529c1733598e38119ceaa3991beecee8326e2cca..205c49cff1827c49b38b72e9696cee76a929f9a7 100644 (file)
@@ -3953,22 +3953,45 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
 
     if (! (func->kind == Name_kind &&
            asdl_seq_LEN(args) == 1 &&
-           asdl_seq_LEN(kwds) == 0 &&
-           asdl_seq_GET(args, 0)->kind == GeneratorExp_kind))
+           asdl_seq_LEN(kwds) == 0))
     {
         return 0;
     }
 
-    expr_ty generator_exp = asdl_seq_GET(args, 0);
-    PySTEntryObject *generator_entry = _PySymtable_Lookup(SYMTABLE(c), (void *)generator_exp);
+    location loc = LOC(func);
+
+    expr_ty arg_expr = asdl_seq_GET(args, 0);
+
+    if (_PyUnicode_EqualToASCIIString(func->v.Name.id, "frozenset")
+        && (arg_expr->kind == Set_kind || arg_expr->kind == SetComp_kind)) {
+        NEW_JUMP_TARGET_LABEL(c, skip_optimization);
+
+        ADDOP_I(c, loc, COPY, 1);
+        ADDOP_I(c, loc, LOAD_COMMON_CONSTANT, CONSTANT_BUILTIN_FROZENSET);
+        ADDOP_COMPARE(c, loc, Is);
+        ADDOP_JUMP(c, loc, POP_JUMP_IF_FALSE, skip_optimization);
+        ADDOP(c, loc, POP_TOP);
+
+        VISIT(c, expr, arg_expr);
+        ADDOP_I(c, loc, CALL_INTRINSIC_1, INTRINSIC_BUILD_FROZENSET);
+
+        ADDOP_JUMP(c, loc, JUMP, end);
+
+        USE_LABEL(c, skip_optimization);
+        return 1;
+    }
+
+    if (arg_expr->kind != GeneratorExp_kind) {
+        return 0;
+    }
+
+    PySTEntryObject *generator_entry = _PySymtable_Lookup(SYMTABLE(c), (void *)arg_expr);
     if (generator_entry->ste_coroutine) {
         Py_DECREF(generator_entry);
         return 0;
     }
     Py_DECREF(generator_entry);
 
-    location loc = LOC(func);
-
     int optimized = 0;
     NEW_JUMP_TARGET_LABEL(c, skip_optimization);
 
@@ -3994,6 +4017,9 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
     else if (_PyUnicode_EqualToASCIIString(func->v.Name.id, "set")) {
         const_oparg = CONSTANT_BUILTIN_SET;
     }
+    else if (_PyUnicode_EqualToASCIIString(func->v.Name.id, "frozenset")) {
+        const_oparg = CONSTANT_BUILTIN_FROZENSET;
+    }
     if (const_oparg != -1) {
         ADDOP_I(c, loc, COPY, 1); // the function
         ADDOP_I(c, loc, LOAD_COMMON_CONSTANT, const_oparg);
@@ -4003,10 +4029,10 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
 
         if (const_oparg == CONSTANT_BUILTIN_TUPLE || const_oparg == CONSTANT_BUILTIN_LIST) {
             ADDOP_I(c, loc, BUILD_LIST, 0);
-        } else if (const_oparg == CONSTANT_BUILTIN_SET) {
+        } else if (const_oparg == CONSTANT_BUILTIN_SET || const_oparg == CONSTANT_BUILTIN_FROZENSET) {
             ADDOP_I(c, loc, BUILD_SET, 0);
         }
-        VISIT(c, expr, generator_exp);
+        VISIT(c, expr, arg_expr);
 
         NEW_JUMP_TARGET_LABEL(c, loop);
         NEW_JUMP_TARGET_LABEL(c, cleanup);
@@ -4017,7 +4043,7 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
         if (const_oparg == CONSTANT_BUILTIN_TUPLE || const_oparg == CONSTANT_BUILTIN_LIST) {
             ADDOP_I(c, loc, LIST_APPEND, 3);
             ADDOP_JUMP(c, loc, JUMP, loop);
-        } else if (const_oparg == CONSTANT_BUILTIN_SET) {
+        } else if (const_oparg == CONSTANT_BUILTIN_SET || const_oparg == CONSTANT_BUILTIN_FROZENSET) {
             ADDOP_I(c, loc, SET_ADD, 3);
             ADDOP_JUMP(c, loc, JUMP, loop);
         }
@@ -4029,7 +4055,8 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
         ADDOP(c, NO_LOCATION, POP_ITER);
         if (const_oparg != CONSTANT_BUILTIN_TUPLE &&
             const_oparg != CONSTANT_BUILTIN_LIST &&
-            const_oparg != CONSTANT_BUILTIN_SET) {
+            const_oparg != CONSTANT_BUILTIN_SET &&
+            const_oparg != CONSTANT_BUILTIN_FROZENSET) {
             ADDOP_LOAD_CONST(c, loc, initial_res == Py_True ? Py_False : Py_True);
         }
         ADDOP_JUMP(c, loc, JUMP, end);
@@ -4044,6 +4071,9 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
         } else if (const_oparg == CONSTANT_BUILTIN_SET) {
             // result is already a set
         }
+        else if (const_oparg == CONSTANT_BUILTIN_FROZENSET) {
+            ADDOP_I(c, loc, CALL_INTRINSIC_1, INTRINSIC_BUILD_FROZENSET);
+        }
         else {
             ADDOP_LOAD_CONST(c, loc, initial_res);
         }
index 9f994950f2721db7312ec39d4b6dd3b6e5a4a526..f081f33cc83b88c8a0cb76801f94377656edf7e7 100644 (file)
@@ -9,6 +9,7 @@
 #include "pycore_intrinsics.h"    // INTRINSIC_PRINT
 #include "pycore_list.h"          // _PyList_AsTupleAndClear()
 #include "pycore_object.h"        // _PyObject_IsUniquelyReferenced()
+#include "pycore_setobject.h"     // _PySet_Freeze()
 #include "pycore_pyerrors.h"      // _PyErr_SetString()
 #include "pycore_runtime.h"       // _Py_ID()
 #include "pycore_typevarobject.h" // _Py_make_typevar()
@@ -207,6 +208,14 @@ make_typevar(PyThreadState* Py_UNUSED(ignored), PyObject *v)
     return _Py_make_typevar(v, NULL, NULL);
 }
 
+static PyObject *
+make_frozenset(PyThreadState* Py_UNUSED(ignored), PyObject *set)
+{
+    assert(PySet_CheckExact(set));
+    assert(_PyObject_IsUniquelyReferenced(set));
+    return _PySet_Freeze(set);
+}
+
 
 #define INTRINSIC_FUNC_ENTRY(N, F) \
     [N] = {F, #N},
@@ -225,6 +234,7 @@ _PyIntrinsics_UnaryFunctions[] = {
     INTRINSIC_FUNC_ENTRY(INTRINSIC_TYPEVARTUPLE, _Py_make_typevartuple)
     INTRINSIC_FUNC_ENTRY(INTRINSIC_SUBSCRIPT_GENERIC, _Py_subscript_generic)
     INTRINSIC_FUNC_ENTRY(INTRINSIC_TYPEALIAS, _Py_make_typealias)
+    INTRINSIC_FUNC_ENTRY(INTRINSIC_BUILD_FROZENSET, make_frozenset)
 };
 
 
index 46579a45f4cc39733914b7741af665ebb12cab9a..cc29a832fc754921fcea686c9f0bce10e7b6904c 100644 (file)
@@ -892,6 +892,7 @@ pycore_init_builtins(PyThreadState *tstate)
     interp->common_consts[CONSTANT_FALSE] = Py_False;
     interp->common_consts[CONSTANT_MINUS_ONE] =
         (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS - 1];
+    interp->common_consts[CONSTANT_BUILTIN_FROZENSET] = (PyObject *)&PyFrozenSet_Type;
     for (int i = 0; i < NUM_COMMON_CONSTANTS; i++) {
         assert(interp->common_consts[i] != NULL);
     }