]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-146455: Fix O(N²) in add_const() after constant folding moved to CFG (#146456)
authorzSirius <107359899+zSirius@users.noreply.github.com>
Sun, 26 Apr 2026 12:15:24 +0000 (20:15 +0800)
committerGitHub <noreply@github.com>
Sun, 26 Apr 2026 12:15:24 +0000 (15:15 +0300)
The add_const() function in flowgraph.c uses a linear search over the
consts list to find the index of a constant. After gh-126835 moved
constant folding from the AST optimizer to the CFG optimizer, this
function is now called N times for N inner tuple elements during
fold_tuple_of_constants(), resulting in O(N²) total time.

Fix by maintaining an auxiliary _Py_hashtable_t that maps object
pointers to their indices in the consts list, providing O(1) lookup.

For a file with 100,000 constant 2-tuples:
- Before: 10.38s (add_const occupies 83.76% of CPU time)
- After:  1.48s

Misc/NEWS.d/next/Core_and_Builtins/2026-03-26-08-49-35.gh-issue-146455.f54083a9.rst [new file with mode: 0644]
Python/flowgraph.c

diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-03-26-08-49-35.gh-issue-146455.f54083a9.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-26-08-49-35.gh-issue-146455.f54083a9.rst
new file mode 100644 (file)
index 0000000..4d7537f
--- /dev/null
@@ -0,0 +1 @@
+Fix O(N²) compile-time regression in constant folding after it was moved from AST to CFG optimizer.
index 202e3bacf2e1bfe5cb5413f4583d577dbe85c66b..2cb2d32a4106131f6e30ba4cca1c99f3e2dc8899 100644 (file)
@@ -6,6 +6,7 @@
 #include "pycore_intrinsics.h"
 #include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
 #include "pycore_long.h"          // _PY_IS_SMALL_INT()
+#include "pycore_hashtable.h"     // _Py_hashtable_t
 
 #include "pycore_opcode_utils.h"
 #include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
@@ -1333,30 +1334,38 @@ get_const_value(int opcode, int oparg, PyObject *co_consts)
 
 // Steals a reference to newconst.
 static int
-add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
+add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache,
+          _Py_hashtable_t *consts_index)
 {
     if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
         Py_DECREF(newconst);
         return -1;
     }
 
-    Py_ssize_t index;
-    for (index = 0; index < PyList_GET_SIZE(consts); index++) {
-        if (PyList_GET_ITEM(consts, index) == newconst) {
-            break;
-        }
+    _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(consts_index, (void *)newconst);
+    if (entry != NULL) {
+        Py_DECREF(newconst);
+        return (int)(uintptr_t)entry->value;
     }
-    if (index == PyList_GET_SIZE(consts)) {
-        if ((size_t)index >= (size_t)INT_MAX - 1) {
-            PyErr_SetString(PyExc_OverflowError, "too many constants");
-            Py_DECREF(newconst);
-            return -1;
-        }
-        if (PyList_Append(consts, newconst)) {
-            Py_DECREF(newconst);
-            return -1;
-        }
+
+    Py_ssize_t index = PyList_GET_SIZE(consts);
+    if ((size_t)index >= (size_t)INT_MAX - 1) {
+        PyErr_SetString(PyExc_OverflowError, "too many constants");
+        Py_DECREF(newconst);
+        return -1;
+    }
+    if (PyList_Append(consts, newconst)) {
+        Py_DECREF(newconst);
+        return -1;
+    }
+
+    if (_Py_hashtable_set(consts_index, (void *)newconst, (void *)(uintptr_t)index) < 0) {
+        PyList_SetSlice(consts, index, index + 1, NULL);
+        Py_DECREF(newconst);
+        PyErr_NoMemory();
+        return -1;
     }
+
     Py_DECREF(newconst);
     return (int)index;
 }
@@ -1432,7 +1441,8 @@ maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
 /* Steals reference to "newconst" */
 static int
 instr_make_load_const(cfg_instr *instr, PyObject *newconst,
-                      PyObject *consts, PyObject *const_cache)
+                      PyObject *consts, PyObject *const_cache,
+                      _Py_hashtable_t *consts_index)
 {
     int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
     if (res < 0) {
@@ -1442,7 +1452,7 @@ instr_make_load_const(cfg_instr *instr, PyObject *newconst,
     if (res > 0) {
         return SUCCESS;
     }
-    int oparg = add_const(newconst, consts, const_cache);
+    int oparg = add_const(newconst, consts, const_cache, consts_index);
     RETURN_IF_ERROR(oparg);
     INSTR_SET_OP1(instr, LOAD_CONST, oparg);
     return SUCCESS;
@@ -1455,7 +1465,8 @@ instr_make_load_const(cfg_instr *instr, PyObject *newconst,
    Called with codestr pointing to the first LOAD_CONST.
 */
 static int
-fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
+fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts,
+                        PyObject *const_cache, _Py_hashtable_t *consts_index)
 {
     /* Pre-conditions */
     assert(PyDict_CheckExact(const_cache));
@@ -1492,7 +1503,7 @@ fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const
     }
 
     nop_out(const_instrs, seq_size);
-    return instr_make_load_const(instr, const_tuple, consts, const_cache);
+    return instr_make_load_const(instr, const_tuple, consts, const_cache, consts_index);
 }
 
 /* Replace:
@@ -1510,7 +1521,8 @@ fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const
 */
 static int
 fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
-                                      PyObject *consts, PyObject *const_cache)
+                                      PyObject *consts, PyObject *const_cache,
+                                      _Py_hashtable_t *consts_index)
 {
     assert(PyDict_CheckExact(const_cache));
     assert(PyList_CheckExact(consts));
@@ -1562,7 +1574,7 @@ fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
                 nop_out(&instr, 1);
             }
             assert(consts_found == 0);
-            return instr_make_load_const(intrinsic, newconst, consts, const_cache);
+            return instr_make_load_const(intrinsic, newconst, consts, const_cache, consts_index);
         }
 
         if (expect_append) {
@@ -1598,7 +1610,8 @@ Optimize lists and sets for:
 */
 static int
 optimize_lists_and_sets(basicblock *bb, int i, int nextop,
-                        PyObject *consts, PyObject *const_cache)
+                        PyObject *consts, PyObject *const_cache,
+                        _Py_hashtable_t *consts_index)
 {
     assert(PyDict_CheckExact(const_cache));
     assert(PyList_CheckExact(consts));
@@ -1648,7 +1661,7 @@ optimize_lists_and_sets(basicblock *bb, int i, int nextop,
         Py_SETREF(const_result, frozenset);
     }
 
-    int index = add_const(const_result, consts, const_cache);
+    int index = add_const(const_result, consts, const_cache, consts_index);
     RETURN_IF_ERROR(index);
     nop_out(const_instrs, seq_size);
 
@@ -1845,7 +1858,8 @@ eval_const_binop(PyObject *left, int op, PyObject *right)
 }
 
 static int
-fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
+fold_const_binop(basicblock *bb, int i, PyObject *consts,
+                 PyObject *const_cache, _Py_hashtable_t *consts_index)
 {
     #define BINOP_OPERAND_COUNT 2
     assert(PyDict_CheckExact(const_cache));
@@ -1887,7 +1901,7 @@ fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
     }
 
     nop_out(operands_instrs, BINOP_OPERAND_COUNT);
-    return instr_make_load_const(binop, newconst, consts, const_cache);
+    return instr_make_load_const(binop, newconst, consts, const_cache, consts_index);
 }
 
 static PyObject *
@@ -1933,7 +1947,8 @@ eval_const_unaryop(PyObject *operand, int opcode, int oparg)
 }
 
 static int
-fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
+fold_const_unaryop(basicblock *bb, int i, PyObject *consts,
+                   PyObject *const_cache, _Py_hashtable_t *consts_index)
 {
     #define UNARYOP_OPERAND_COUNT 1
     assert(PyDict_CheckExact(const_cache));
@@ -1970,7 +1985,7 @@ fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cach
         assert(PyBool_Check(newconst));
     }
     nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
-    return instr_make_load_const(unaryop, newconst, consts, const_cache);
+    return instr_make_load_const(unaryop, newconst, consts, const_cache, consts_index);
 }
 
 #define VISITED (-1)
@@ -2165,7 +2180,8 @@ apply_static_swaps(basicblock *block, int i)
 }
 
 static int
-basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
+basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb,
+                               PyObject *consts, _Py_hashtable_t *consts_index)
 {
     assert(PyDict_CheckExact(const_cache));
     assert(PyList_CheckExact(consts));
@@ -2283,7 +2299,7 @@ basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *
                     return ERROR;
                 }
                 cnt = PyBool_FromLong(is_true);
-                int index = add_const(cnt, consts, const_cache);
+                int index = add_const(cnt, consts, const_cache, consts_index);
                 if (index < 0) {
                     return ERROR;
                 }
@@ -2297,15 +2313,17 @@ basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *
 }
 
 static int
-optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
+optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts,
+                    _Py_hashtable_t *consts_index) {
     for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
-        RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
+        RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts, consts_index));
     }
     return SUCCESS;
 }
 
 static int
-optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
+optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts,
+                     _Py_hashtable_t *consts_index)
 {
     assert(PyDict_CheckExact(const_cache));
     assert(PyList_CheckExact(consts));
@@ -2345,11 +2363,11 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
                             continue;
                     }
                 }
-                RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache));
+                RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache, consts_index));
                 break;
             case BUILD_LIST:
             case BUILD_SET:
-                RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));
+                RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache, consts_index));
                 break;
             case POP_JUMP_IF_NOT_NONE:
             case POP_JUMP_IF_NONE:
@@ -2484,7 +2502,7 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
                 _Py_FALLTHROUGH;
             case UNARY_INVERT:
             case UNARY_NEGATIVE:
-                RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
+                RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index));
                 break;
             case CALL_INTRINSIC_1:
                 if (oparg == INTRINSIC_LIST_TO_TUPLE) {
@@ -2492,15 +2510,15 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
                         INSTR_SET_OP0(inst, NOP);
                     }
                     else {
-                        RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache));
+                        RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache, consts_index));
                     }
                 }
                 else if (oparg == INTRINSIC_UNARY_POSITIVE) {
-                    RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
+                    RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index));
                 }
                 break;
             case BINARY_OP:
-                RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));
+                RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache, consts_index));
                 break;
         }
     }
@@ -2545,16 +2563,17 @@ remove_redundant_nops_and_jumps(cfg_builder *g)
    NOPs.  Later those NOPs are removed.
 */
 static int
-optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
+optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache,
+             _Py_hashtable_t *consts_index, int firstlineno)
 {
     assert(PyDict_CheckExact(const_cache));
     RETURN_IF_ERROR(check_cfg(g));
     RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
     RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
     RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
-    RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
+    RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts, consts_index));
     for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
-        RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
+        RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts, consts_index));
     }
     RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
     RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
@@ -3674,7 +3693,33 @@ _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
     RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
 
     /** Optimization **/
-    RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
+
+    _Py_hashtable_t *consts_index = _Py_hashtable_new(
+        _Py_hashtable_hash_ptr, _Py_hashtable_compare_direct);
+    if (consts_index == NULL) {
+        PyErr_NoMemory();
+        return ERROR;
+    }
+
+    for (Py_ssize_t i = 0; i < PyList_GET_SIZE(consts); i++) {
+        PyObject *item = PyList_GET_ITEM(consts, i);
+        if (_Py_hashtable_get_entry(consts_index, (void *)item) != NULL) {
+            continue;
+        }
+        if (_Py_hashtable_set(consts_index, (void *)item,
+                              (void *)(uintptr_t)i) < 0) {
+            _Py_hashtable_destroy(consts_index);
+            PyErr_NoMemory();
+            return ERROR;
+        }
+    }
+
+    int ret = optimize_cfg(g, consts, const_cache, consts_index, firstlineno);
+
+    _Py_hashtable_destroy(consts_index);
+
+    RETURN_IF_ERROR(ret);
+
     RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
     RETURN_IF_ERROR(
         add_checks_for_loads_of_uninitialized_variables(