]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-142863: optimize `list` and `set` calls with generator expressions (#142864)
authorKumar Aditya <kumaraditya@python.org>
Tue, 6 Jan 2026 17:53:28 +0000 (23:23 +0530)
committerGitHub <noreply@github.com>
Tue, 6 Jan 2026 17:53:28 +0000 (23:23 +0530)
Include/internal/pycore_magic_number.h
Include/internal/pycore_opcode_utils.h
Lib/opcode.py
Lib/test/test_builtin.py
Lib/test/test_dis.py
Misc/NEWS.d/next/Core_and_Builtins/2025-12-17-10-12-09.gh-issue-142863.ZW2ZF8.rst [new file with mode: 0644]
Python/codegen.c
Python/pylifecycle.c

index 09954856204a10a87d7bcb4e914dba8076be0cb2..0724da99b45ba6fb704f71676b0c0d74febeef1d 100644 (file)
@@ -288,6 +288,7 @@ Known values:
     Python 3.15a1 3655 (Fix miscompilation of some module-level annotations)
     Python 3.15a2 3656 (Add TRACE_RECORD instruction, for platforms with switch based interpreter)
     Python 3.15a4 3657 (Add BINARY_OP_SUBSCR_USTR_INT)
+    Python 3.15a4 3658 (Optimize bytecode for list/set called on genexp)
 
 
     Python 3.16 will start with 3700
@@ -301,7 +302,7 @@ PC/launcher.c must also be updated.
 
 */
 
-#define PYC_MAGIC_NUMBER 3657
+#define PYC_MAGIC_NUMBER 3658
 /* This is equivalent to converting PYC_MAGIC_NUMBER to 2 bytes
    (little-endian) and then appending b'\r\n'. */
 #define PYC_MAGIC_NUMBER_TOKEN \
index 79a1a242556a52be650e6104198108fff5d1a2ef..e4d859fcc47d02242ef11d62d9822b1773efa90d 100644 (file)
@@ -73,7 +73,9 @@ extern "C" {
 #define CONSTANT_BUILTIN_TUPLE 2
 #define CONSTANT_BUILTIN_ALL 3
 #define CONSTANT_BUILTIN_ANY 4
-#define NUM_COMMON_CONSTANTS 5
+#define CONSTANT_BUILTIN_LIST 5
+#define CONSTANT_BUILTIN_SET 6
+#define NUM_COMMON_CONSTANTS 7
 
 /* Values used in the oparg for RESUME */
 #define RESUME_AT_FUNC_START 0
index 0e9520b68324997ea1061cfaade68554d4b5a6f4..d8374c45481a944b915ba3f8a02b94b0b99596a6 100644 (file)
@@ -40,7 +40,8 @@ _intrinsic_1_descs = _opcode.get_intrinsic1_descs()
 _intrinsic_2_descs = _opcode.get_intrinsic2_descs()
 _special_method_names = _opcode.get_special_method_names()
 _common_constants = [builtins.AssertionError, builtins.NotImplementedError,
-                     builtins.tuple, builtins.all, builtins.any]
+                     builtins.tuple, builtins.all, builtins.any, builtins.list,
+                     builtins.set]
 _nb_ops = _opcode.get_nb_ops()
 
 hascompare = [opmap["COMPARE_OP"]]
index ce60a5d095dd52adb8b36f476c3b0e788abd9de6..7b69374b1868d15e59931528fde5620e95a8c5ce 100644 (file)
@@ -246,7 +246,7 @@ class BuiltinTest(ComplexesAreIdenticalMixin, unittest.TestCase):
         S = [10, 20, 30]
         self.assertEqual(any(x > 42 for x in S), False)
 
-    def test_all_any_tuple_optimization(self):
+    def test_all_any_tuple_list_set_optimization(self):
         def f_all():
             return all(x-2 for x in [1,2,3])
 
@@ -256,7 +256,13 @@ class BuiltinTest(ComplexesAreIdenticalMixin, unittest.TestCase):
         def f_tuple():
             return tuple(2*x for x in [1,2,3])
 
-        funcs = [f_all, f_any, f_tuple]
+        def f_list():
+            return list(2*x for x in [1,2,3])
+
+        def f_set():
+            return set(2*x for x in [1,2,3])
+
+        funcs = [f_all, f_any, f_tuple, f_list, f_set]
 
         for f in funcs:
             # check that generator code object is not duplicated
@@ -266,33 +272,35 @@ class BuiltinTest(ComplexesAreIdenticalMixin, unittest.TestCase):
 
         # check the overriding the builtins works
 
-        global all, any, tuple
-        saved = all, any, tuple
+        global all, any, tuple, list, set
+        saved = all, any, tuple, list, set
         try:
             all = lambda x : "all"
             any = lambda x : "any"
             tuple = lambda x : "tuple"
+            list = lambda x : "list"
+            set = lambda x : "set"
 
             overridden_outputs = [f() for f in funcs]
         finally:
-            all, any, tuple = saved
-
-        self.assertEqual(overridden_outputs, ['all', 'any', 'tuple'])
+            all, any, tuple, list, set = saved
 
+        self.assertEqual(overridden_outputs, ['all', 'any', 'tuple', 'list', 'set'])
         # Now repeat, overriding the builtins module as well
-        saved = all, any, tuple
+        saved = all, any, tuple, list, set
         try:
             builtins.all = all = lambda x : "all"
             builtins.any = any = lambda x : "any"
             builtins.tuple = tuple = lambda x : "tuple"
+            builtins.list = list = lambda x : "list"
+            builtins.set = set = lambda x : "set"
 
             overridden_outputs = [f() for f in funcs]
         finally:
-            all, any, tuple = saved
-            builtins.all, builtins.any, builtins.tuple = saved
-
-        self.assertEqual(overridden_outputs, ['all', 'any', 'tuple'])
+            all, any, tuple, list, set = saved
+            builtins.all, builtins.any, builtins.tuple, builtins.list, builtins.set = saved
 
+        self.assertEqual(overridden_outputs, ['all', 'any', 'tuple', 'list', 'set'])
 
     def test_ascii(self):
         self.assertEqual(ascii(''), '\'\'')
index 3e7477487200d09777989853c7c8beadf37d936c..902bcdd196ec93041ca0aa366b225a97f2893b93 100644 (file)
@@ -827,7 +827,14 @@ Disassembly of <code object foo at 0x..., file "%s", line %d>:
 
 %4d           RESUME                   0
 
-%4d           LOAD_GLOBAL              1 (list + NULL)
+%4d           LOAD_GLOBAL              0 (list)
+               COPY                     1
+               LOAD_COMMON_CONSTANT     5 (list)
+               IS_OP                    0 (is)
+               POP_JUMP_IF_FALSE       22 (to L3)
+               NOT_TAKEN
+               POP_TOP
+               BUILD_LIST               0
                LOAD_FAST_BORROW         0 (x)
                BUILD_TUPLE              1
                LOAD_CONST               %d (<code object <genexpr> at 0x..., file "%s", line %d>)
@@ -835,6 +842,21 @@ Disassembly of <code object foo at 0x..., file "%s", line %d>:
                SET_FUNCTION_ATTRIBUTE   8 (closure)
                LOAD_DEREF               1 (y)
                CALL                     0
+               PUSH_NULL
+       L1:     FOR_ITER                 3 (to L2)
+               LIST_APPEND              3
+               JUMP_BACKWARD            5 (to L1)
+       L2:     END_FOR
+               POP_ITER
+               RETURN_VALUE
+       L3:     PUSH_NULL
+               LOAD_FAST_BORROW         0 (x)
+               BUILD_TUPLE              1
+               LOAD_CONST               1 (<code object <genexpr> at 0x..., file "%s", line %d>)
+               MAKE_FUNCTION
+               SET_FUNCTION_ATTRIBUTE   8 (closure)
+               LOAD_DEREF               1 (y)
+               CALL                     0
                CALL                     1
                RETURN_VALUE
 """ % (dis_nested_0,
@@ -845,6 +867,8 @@ Disassembly of <code object foo at 0x..., file "%s", line %d>:
        1 if __debug__ else 0,
        __file__,
        _h.__code__.co_firstlineno + 3,
+       __file__,
+       _h.__code__.co_firstlineno + 3,
 )
 
 dis_nested_2 = """%s
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-17-10-12-09.gh-issue-142863.ZW2ZF8.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-17-10-12-09.gh-issue-142863.ZW2ZF8.rst
new file mode 100644 (file)
index 0000000..90c8f32
--- /dev/null
@@ -0,0 +1 @@
+Generate optimized bytecode when calling :class:`list` or :class:`set` with generator expression.
index c4109fcaa48dbe863faf74be9f7ba63296a18684..acd8d84bc9ec6d5b2063b8a92b4edc42401cfdac 100644 (file)
@@ -3892,6 +3892,12 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
     else if (_PyUnicode_EqualToASCIIString(func->v.Name.id, "tuple")) {
         const_oparg = CONSTANT_BUILTIN_TUPLE;
     }
+    else if (_PyUnicode_EqualToASCIIString(func->v.Name.id, "list")) {
+        const_oparg = CONSTANT_BUILTIN_LIST;
+    }
+    else if (_PyUnicode_EqualToASCIIString(func->v.Name.id, "set")) {
+        const_oparg = CONSTANT_BUILTIN_SET;
+    }
     if (const_oparg != -1) {
         ADDOP_I(c, loc, COPY, 1); // the function
         ADDOP_I(c, loc, LOAD_COMMON_CONSTANT, const_oparg);
@@ -3899,8 +3905,10 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
         ADDOP_JUMP(c, loc, POP_JUMP_IF_FALSE, skip_optimization);
         ADDOP(c, loc, POP_TOP);
 
-        if (const_oparg == CONSTANT_BUILTIN_TUPLE) {
+        if (const_oparg == CONSTANT_BUILTIN_TUPLE || const_oparg == CONSTANT_BUILTIN_LIST) {
             ADDOP_I(c, loc, BUILD_LIST, 0);
+        } else if (const_oparg == CONSTANT_BUILTIN_SET) {
+            ADDOP_I(c, loc, BUILD_SET, 0);
         }
         expr_ty generator_exp = asdl_seq_GET(args, 0);
         VISIT(c, expr, generator_exp);
@@ -3911,9 +3919,12 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
         ADDOP(c, loc, PUSH_NULL); // Push NULL index for loop
         USE_LABEL(c, loop);
         ADDOP_JUMP(c, loc, FOR_ITER, cleanup);
-        if (const_oparg == CONSTANT_BUILTIN_TUPLE) {
+        if (const_oparg == CONSTANT_BUILTIN_TUPLE || const_oparg == CONSTANT_BUILTIN_LIST) {
             ADDOP_I(c, loc, LIST_APPEND, 3);
             ADDOP_JUMP(c, loc, JUMP, loop);
+        } else if (const_oparg == CONSTANT_BUILTIN_SET) {
+            ADDOP_I(c, loc, SET_ADD, 3);
+            ADDOP_JUMP(c, loc, JUMP, loop);
         }
         else {
             ADDOP(c, loc, TO_BOOL);
@@ -3921,7 +3932,9 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
         }
 
         ADDOP(c, NO_LOCATION, POP_ITER);
-        if (const_oparg != CONSTANT_BUILTIN_TUPLE) {
+        if (const_oparg != CONSTANT_BUILTIN_TUPLE &&
+            const_oparg != CONSTANT_BUILTIN_LIST &&
+            const_oparg != CONSTANT_BUILTIN_SET) {
             ADDOP_LOAD_CONST(c, loc, initial_res == Py_True ? Py_False : Py_True);
         }
         ADDOP_JUMP(c, loc, JUMP, end);
@@ -3931,6 +3944,10 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
         ADDOP(c, NO_LOCATION, POP_ITER);
         if (const_oparg == CONSTANT_BUILTIN_TUPLE) {
             ADDOP_I(c, loc, CALL_INTRINSIC_1, INTRINSIC_LIST_TO_TUPLE);
+        } else if (const_oparg == CONSTANT_BUILTIN_LIST) {
+            // result is already a list
+        } else if (const_oparg == CONSTANT_BUILTIN_SET) {
+            // result is already a set
         }
         else {
             ADDOP_LOAD_CONST(c, loc, initial_res);
index bb663db195c0890a0b34c613fb9e96b1842ba21e..16fb43ea1914397c2b2ba4d6f8a41abf7de9ca14 100644 (file)
@@ -832,6 +832,8 @@ pycore_init_builtins(PyThreadState *tstate)
     interp->common_consts[CONSTANT_BUILTIN_TUPLE] = (PyObject*)&PyTuple_Type;
     interp->common_consts[CONSTANT_BUILTIN_ALL] = all;
     interp->common_consts[CONSTANT_BUILTIN_ANY] = any;
+    interp->common_consts[CONSTANT_BUILTIN_LIST] = (PyObject*)&PyList_Type;
+    interp->common_consts[CONSTANT_BUILTIN_SET] = (PyObject*)&PySet_Type;
 
     for (int i=0; i < NUM_COMMON_CONSTANTS; i++) {
         assert(interp->common_consts[i] != NULL);