From: Peter Bierma Date: Tue, 19 May 2026 17:57:37 +0000 (-0400) Subject: gh-150027: Avoid copying during construction of `frozenset` objects (GH-150028) X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=409fa8e1f3cfaae35fa4d12f6ac6d564c07ec6be;p=thirdparty%2FPython%2Fcpython.git gh-150027: Avoid copying during construction of `frozenset` objects (GH-150028) --- diff --git a/Include/internal/pycore_intrinsics.h b/Include/internal/pycore_intrinsics.h index 39c2a30f6e97..59a7b16073f8 100644 --- a/Include/internal/pycore_intrinsics.h +++ b/Include/internal/pycore_intrinsics.h @@ -18,8 +18,9 @@ #define INTRINSIC_TYPEVARTUPLE 9 #define INTRINSIC_SUBSCRIPT_GENERIC 10 #define INTRINSIC_TYPEALIAS 11 +#define INTRINSIC_BUILD_FROZENSET 12 -#define MAX_INTRINSIC_1 11 +#define MAX_INTRINSIC_1 12 /* Binary Functions: */ diff --git a/Include/internal/pycore_opcode_utils.h b/Include/internal/pycore_opcode_utils.h index 3e2c4ae411c9..b20718344b39 100644 --- a/Include/internal/pycore_opcode_utils.h +++ b/Include/internal/pycore_opcode_utils.h @@ -80,7 +80,8 @@ extern "C" { #define CONSTANT_TRUE 9 #define CONSTANT_FALSE 10 #define CONSTANT_MINUS_ONE 11 -#define NUM_COMMON_CONSTANTS 12 +#define CONSTANT_BUILTIN_FROZENSET 12 +#define NUM_COMMON_CONSTANTS 13 /* Values used in the oparg for RESUME */ #define RESUME_AT_FUNC_START 0 diff --git a/Include/internal/pycore_setobject.h b/Include/internal/pycore_setobject.h index 24d0135ed1ae..92d1a15177f7 100644 --- a/Include/internal/pycore_setobject.h +++ b/Include/internal/pycore_setobject.h @@ -35,6 +35,9 @@ extern void _PySet_ClearInternal(PySetObject *so); PyAPI_FUNC(int) _PySet_AddTakeRef(PySetObject *so, PyObject *key); +PyObject * +_PySet_Freeze(PyObject *set); + #ifdef __cplusplus } #endif diff --git a/Lib/opcode.py b/Lib/opcode.py index 4e60fb5af34f..bb7824da70e8 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -44,7 +44,7 @@ _common_constants = [builtins.AssertionError, builtins.NotImplementedError, builtins.set, # Append-only — must match CONSTANT_* in # Include/internal/pycore_opcode_utils.h. - None, "", True, False, -1] + None, "", True, False, -1, builtins.frozenset] _nb_ops = _opcode.get_nb_ops() hascompare = [opmap["COMPARE_OP"]] diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index d62a3a4f17f8..1f52b16948c7 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -268,7 +268,10 @@ class BuiltinTest(ComplexesAreIdenticalMixin, unittest.TestCase): def f_set(): return set(2*x for x in [1,2,3]) - funcs = [f_all, f_any, f_tuple, f_list, f_set] + def f_frozenset(): + return frozenset(2*x for x in [1,2,3]) + + funcs = [f_all, f_any, f_tuple, f_list, f_set, f_frozenset] for f in funcs: # check that generator code object is not duplicated @@ -278,35 +281,37 @@ class BuiltinTest(ComplexesAreIdenticalMixin, unittest.TestCase): # check the overriding the builtins works - global all, any, tuple, list, set - saved = all, any, tuple, list, set + global all, any, tuple, list, set, frozenset + saved = all, any, tuple, list, set, frozenset try: all = lambda x : "all" any = lambda x : "any" tuple = lambda x : "tuple" list = lambda x : "list" set = lambda x : "set" + frozenset = lambda x : "frozenset" overridden_outputs = [f() for f in funcs] finally: - all, any, tuple, list, set = saved + all, any, tuple, list, set, frozenset = saved - self.assertEqual(overridden_outputs, ['all', 'any', 'tuple', 'list', 'set']) + self.assertEqual(overridden_outputs, ['all', 'any', 'tuple', 'list', 'set', 'frozenset']) # Now repeat, overriding the builtins module as well - saved = all, any, tuple, list, set + saved = all, any, tuple, list, set, frozenset try: builtins.all = all = lambda x : "all" builtins.any = any = lambda x : "any" builtins.tuple = tuple = lambda x : "tuple" builtins.list = list = lambda x : "list" builtins.set = set = lambda x : "set" + builtins.frozenset = frozenset = lambda x : "frozenset" overridden_outputs = [f() for f in funcs] finally: - all, any, tuple, list, set = saved - builtins.all, builtins.any, builtins.tuple, builtins.list, builtins.set = saved + all, any, tuple, list, set, frozenset = saved + builtins.all, builtins.any, builtins.tuple, builtins.list, builtins.set, builtins.frozenset = saved - self.assertEqual(overridden_outputs, ['all', 'any', 'tuple', 'list', 'set']) + self.assertEqual(overridden_outputs, ['all', 'any', 'tuple', 'list', 'set', 'frozenset']) def test_builtin_call_async_genexpr_no_crash(self): async def f_all(): diff --git a/Lib/test/test_compiler_codegen.py b/Lib/test/test_compiler_codegen.py index d02937c84d95..36058854a41d 100644 --- a/Lib/test/test_compiler_codegen.py +++ b/Lib/test/test_compiler_codegen.py @@ -161,3 +161,34 @@ class IsolatedCodeGenTests(CodegenTestCase): self.assertIsNone(cm.exception.text) self.assertEqual(cm.exception.offset, 1) self.assertEqual(cm.exception.end_offset, 10) + + def test_frozenset_optimization(self): + l1 = self.Label() + snippet = "frozenset({1, 2, 3})" + expected = [ + ('RESUME', 0), + ('ANNOTATIONS_PLACEHOLDER', None), + ('LOAD_NAME', 0), + ('COPY', 1), + ('LOAD_COMMON_CONSTANT', 12), + ('IS_OP', 0), + ('POP_JUMP_IF_FALSE', l1), + ('POP_TOP', None), + ('LOAD_CONST', 1), + ('LOAD_CONST', 2), + ('LOAD_CONST', 3), + ('BUILD_SET', 3), + ('CALL_INTRINSIC_1', 12), + ('JUMP', 0), + l1, + ('PUSH_NULL', None), + ('LOAD_CONST', 1), + ('LOAD_CONST', 2), + ('LOAD_CONST', 3), + ('BUILD_SET', 3), + ('CALL', 1), + ('POP_TOP', None), + ('LOAD_CONST', 0), + ('RETURN_VALUE', None) + ] + self.codegen_test(snippet, expected) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-05-18-17-16-51.gh-issue-150027.sJgLvd.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-18-17-16-51.gh-issue-150027.sJgLvd.rst new file mode 100644 index 000000000000..66446105da3f --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-18-17-16-51.gh-issue-150027.sJgLvd.rst @@ -0,0 +1,2 @@ +Improve performance of :class:`frozenset` objects by avoiding copies during +construction. diff --git a/Objects/setobject.c b/Objects/setobject.c index 1e6305636045..a1f654f0715b 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -1545,6 +1545,16 @@ set_swap_bodies(PySetObject *a, PySetObject *b) FT_ATOMIC_STORE_PTR_RELEASE(b->table, b_table); } +PyObject * +_PySet_Freeze(PyObject *set) +{ + assert(set != NULL); + assert(PySet_CheckExact(set)); + assert(_PyObject_IsUniquelyReferenced(set)); + set->ob_type = &PyFrozenSet_Type; + return Py_NewRef(set); +} + /*[clinic input] @critical_section set.copy diff --git a/Python/codegen.c b/Python/codegen.c index 529c1733598e..205c49cff182 100644 --- a/Python/codegen.c +++ b/Python/codegen.c @@ -3953,22 +3953,45 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end) if (! (func->kind == Name_kind && asdl_seq_LEN(args) == 1 && - asdl_seq_LEN(kwds) == 0 && - asdl_seq_GET(args, 0)->kind == GeneratorExp_kind)) + asdl_seq_LEN(kwds) == 0)) { return 0; } - expr_ty generator_exp = asdl_seq_GET(args, 0); - PySTEntryObject *generator_entry = _PySymtable_Lookup(SYMTABLE(c), (void *)generator_exp); + location loc = LOC(func); + + expr_ty arg_expr = asdl_seq_GET(args, 0); + + if (_PyUnicode_EqualToASCIIString(func->v.Name.id, "frozenset") + && (arg_expr->kind == Set_kind || arg_expr->kind == SetComp_kind)) { + NEW_JUMP_TARGET_LABEL(c, skip_optimization); + + ADDOP_I(c, loc, COPY, 1); + ADDOP_I(c, loc, LOAD_COMMON_CONSTANT, CONSTANT_BUILTIN_FROZENSET); + ADDOP_COMPARE(c, loc, Is); + ADDOP_JUMP(c, loc, POP_JUMP_IF_FALSE, skip_optimization); + ADDOP(c, loc, POP_TOP); + + VISIT(c, expr, arg_expr); + ADDOP_I(c, loc, CALL_INTRINSIC_1, INTRINSIC_BUILD_FROZENSET); + + ADDOP_JUMP(c, loc, JUMP, end); + + USE_LABEL(c, skip_optimization); + return 1; + } + + if (arg_expr->kind != GeneratorExp_kind) { + return 0; + } + + PySTEntryObject *generator_entry = _PySymtable_Lookup(SYMTABLE(c), (void *)arg_expr); if (generator_entry->ste_coroutine) { Py_DECREF(generator_entry); return 0; } Py_DECREF(generator_entry); - location loc = LOC(func); - int optimized = 0; NEW_JUMP_TARGET_LABEL(c, skip_optimization); @@ -3994,6 +4017,9 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end) else if (_PyUnicode_EqualToASCIIString(func->v.Name.id, "set")) { const_oparg = CONSTANT_BUILTIN_SET; } + else if (_PyUnicode_EqualToASCIIString(func->v.Name.id, "frozenset")) { + const_oparg = CONSTANT_BUILTIN_FROZENSET; + } if (const_oparg != -1) { ADDOP_I(c, loc, COPY, 1); // the function ADDOP_I(c, loc, LOAD_COMMON_CONSTANT, const_oparg); @@ -4003,10 +4029,10 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end) if (const_oparg == CONSTANT_BUILTIN_TUPLE || const_oparg == CONSTANT_BUILTIN_LIST) { ADDOP_I(c, loc, BUILD_LIST, 0); - } else if (const_oparg == CONSTANT_BUILTIN_SET) { + } else if (const_oparg == CONSTANT_BUILTIN_SET || const_oparg == CONSTANT_BUILTIN_FROZENSET) { ADDOP_I(c, loc, BUILD_SET, 0); } - VISIT(c, expr, generator_exp); + VISIT(c, expr, arg_expr); NEW_JUMP_TARGET_LABEL(c, loop); NEW_JUMP_TARGET_LABEL(c, cleanup); @@ -4017,7 +4043,7 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end) if (const_oparg == CONSTANT_BUILTIN_TUPLE || const_oparg == CONSTANT_BUILTIN_LIST) { ADDOP_I(c, loc, LIST_APPEND, 3); ADDOP_JUMP(c, loc, JUMP, loop); - } else if (const_oparg == CONSTANT_BUILTIN_SET) { + } else if (const_oparg == CONSTANT_BUILTIN_SET || const_oparg == CONSTANT_BUILTIN_FROZENSET) { ADDOP_I(c, loc, SET_ADD, 3); ADDOP_JUMP(c, loc, JUMP, loop); } @@ -4029,7 +4055,8 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end) ADDOP(c, NO_LOCATION, POP_ITER); if (const_oparg != CONSTANT_BUILTIN_TUPLE && const_oparg != CONSTANT_BUILTIN_LIST && - const_oparg != CONSTANT_BUILTIN_SET) { + const_oparg != CONSTANT_BUILTIN_SET && + const_oparg != CONSTANT_BUILTIN_FROZENSET) { ADDOP_LOAD_CONST(c, loc, initial_res == Py_True ? Py_False : Py_True); } ADDOP_JUMP(c, loc, JUMP, end); @@ -4044,6 +4071,9 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end) } else if (const_oparg == CONSTANT_BUILTIN_SET) { // result is already a set } + else if (const_oparg == CONSTANT_BUILTIN_FROZENSET) { + ADDOP_I(c, loc, CALL_INTRINSIC_1, INTRINSIC_BUILD_FROZENSET); + } else { ADDOP_LOAD_CONST(c, loc, initial_res); } diff --git a/Python/intrinsics.c b/Python/intrinsics.c index 9f994950f272..f081f33cc83b 100644 --- a/Python/intrinsics.c +++ b/Python/intrinsics.c @@ -9,6 +9,7 @@ #include "pycore_intrinsics.h" // INTRINSIC_PRINT #include "pycore_list.h" // _PyList_AsTupleAndClear() #include "pycore_object.h" // _PyObject_IsUniquelyReferenced() +#include "pycore_setobject.h" // _PySet_Freeze() #include "pycore_pyerrors.h" // _PyErr_SetString() #include "pycore_runtime.h" // _Py_ID() #include "pycore_typevarobject.h" // _Py_make_typevar() @@ -207,6 +208,14 @@ make_typevar(PyThreadState* Py_UNUSED(ignored), PyObject *v) return _Py_make_typevar(v, NULL, NULL); } +static PyObject * +make_frozenset(PyThreadState* Py_UNUSED(ignored), PyObject *set) +{ + assert(PySet_CheckExact(set)); + assert(_PyObject_IsUniquelyReferenced(set)); + return _PySet_Freeze(set); +} + #define INTRINSIC_FUNC_ENTRY(N, F) \ [N] = {F, #N}, @@ -225,6 +234,7 @@ _PyIntrinsics_UnaryFunctions[] = { INTRINSIC_FUNC_ENTRY(INTRINSIC_TYPEVARTUPLE, _Py_make_typevartuple) INTRINSIC_FUNC_ENTRY(INTRINSIC_SUBSCRIPT_GENERIC, _Py_subscript_generic) INTRINSIC_FUNC_ENTRY(INTRINSIC_TYPEALIAS, _Py_make_typealias) + INTRINSIC_FUNC_ENTRY(INTRINSIC_BUILD_FROZENSET, make_frozenset) }; diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 46579a45f4cc..cc29a832fc75 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -892,6 +892,7 @@ pycore_init_builtins(PyThreadState *tstate) interp->common_consts[CONSTANT_FALSE] = Py_False; interp->common_consts[CONSTANT_MINUS_ONE] = (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS - 1]; + interp->common_consts[CONSTANT_BUILTIN_FROZENSET] = (PyObject *)&PyFrozenSet_Type; for (int i = 0; i < NUM_COMMON_CONSTANTS; i++) { assert(interp->common_consts[i] != NULL); }