]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-114265: move line number propagation before cfg optimization, remove guarantee_lin...
authorIrit Katriel <1055913+iritkatriel@users.noreply.github.com>
Fri, 19 Jan 2024 14:49:26 +0000 (14:49 +0000)
committerGitHub <noreply@github.com>
Fri, 19 Jan 2024 14:49:26 +0000 (14:49 +0000)
Lib/importlib/_bootstrap_external.py
Lib/test/test_dis.py
Misc/NEWS.d/next/Core and Builtins/2024-01-19-13-18-13.gh-issue-114265.7HAi--.rst [new file with mode: 0644]
Python/flowgraph.c

index 97858ee83f790f7357d1f77c7041cc91f160625b..a4d2b7e018440926981be3169b02172c56b8d403 100644 (file)
@@ -463,6 +463,7 @@ _code_type = type(_write_atomic.__code__)
 #     Python 3.13a1 3564 (Removed oparg from YIELD_VALUE, changed oparg values of RESUME)
 #     Python 3.13a1 3565 (Oparg of YIELD_VALUE indicates whether it is in a yield-from)
 #     Python 3.13a1 3566 (Emit JUMP_NO_INTERRUPT instead of JUMP for non-loop no-lineno cases)
+#     Python 3.13a1 3567 (Reimplement line number propagation by the compiler)
 
 #     Python 3.14 will start with 3600
 
@@ -479,7 +480,7 @@ _code_type = type(_write_atomic.__code__)
 # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
 # in PC/launcher.c must also be updated.
 
-MAGIC_NUMBER = (3566).to_bytes(2, 'little') + b'\r\n'
+MAGIC_NUMBER = (3567).to_bytes(2, 'little') + b'\r\n'
 
 _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little')  # For import.c
 
index 097f05afdf15176a43eb9ab3425e9414855036d7..3ae81b2f5d62b06399b2379c853ba121a88c27ff 100644 (file)
@@ -577,14 +577,10 @@ dis_asyncwith = """\
                 RETURN_CONST             0 (None)
 
 %4d   L12:     CLEANUP_THROW
-
-  --   L13:     JUMP_BACKWARD_NO_INTERRUPT 25 (to L5)
-
-%4d   L14:     CLEANUP_THROW
-
-  --   L15:     JUMP_BACKWARD_NO_INTERRUPT 9 (to L11)
-
-%4d   L16:     PUSH_EXC_INFO
+       L13:     JUMP_BACKWARD_NO_INTERRUPT 25 (to L5)
+       L14:     CLEANUP_THROW
+       L15:     JUMP_BACKWARD_NO_INTERRUPT 9 (to L11)
+       L16:     PUSH_EXC_INFO
                 WITH_EXCEPT_START
                 GET_AWAITABLE            2
                 LOAD_CONST               0 (None)
@@ -630,8 +626,6 @@ ExceptionTable:
        _asyncwith.__code__.co_firstlineno + 1,
        _asyncwith.__code__.co_firstlineno + 3,
        _asyncwith.__code__.co_firstlineno + 1,
-       _asyncwith.__code__.co_firstlineno + 1,
-       _asyncwith.__code__.co_firstlineno + 1,
        _asyncwith.__code__.co_firstlineno + 3,
        )
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-19-13-18-13.gh-issue-114265.7HAi--.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-19-13-18-13.gh-issue-114265.7HAi--.rst
new file mode 100644 (file)
index 0000000..74affbb
--- /dev/null
@@ -0,0 +1 @@
+Compiler propagates line numbers before optimization, leading to more optimization opportunities and removing the need for the ``guarantee_lineno_for_exits`` hack.
index 4778f89e19b1438f612acf68b433c139f0c18aaf..e84030c87b1b4bb2088497583dd6b266e07c1bda 100644 (file)
@@ -29,6 +29,7 @@ typedef struct _PyCfgInstruction {
     int i_opcode;
     int i_oparg;
     _PyCompilerSrcLocation i_loc;
+    unsigned i_loc_propagated : 1;  /* location was set by propagate_line_numbers */
     struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */
     struct _PyCfgBasicblock *i_except; /* target block when exception is raised */
 } cfg_instr;
@@ -504,6 +505,21 @@ no_redundant_jumps(cfg_builder *g) {
     return true;
 }
 
+static bool
+all_exits_have_lineno(basicblock *entryblock) {
+    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+        for (int i = 0; i < b->b_iused; i++) {
+            cfg_instr *instr = &b->b_instr[i];
+            if (instr->i_opcode == RETURN_VALUE) {
+                if (instr->i_loc.lineno < 0) {
+                    assert(0);
+                    return false;
+                }
+            }
+        }
+    }
+    return true;
+}
 #endif
 
 /***** CFG preprocessing (jump targets and exceptions) *****/
@@ -940,7 +956,10 @@ error:
 /***** CFG optimizations *****/
 
 static int
-mark_reachable(basicblock *entryblock) {
+remove_unreachable(basicblock *entryblock) {
+    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+        b->b_predecessors = 0;
+    }
     basicblock **stack = make_cfg_traversal_stack(entryblock);
     if (stack == NULL) {
         return ERROR;
@@ -972,6 +991,14 @@ mark_reachable(basicblock *entryblock) {
         }
     }
     PyMem_Free(stack);
+
+    /* Delete unreachable instructions */
+    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+       if (b->b_predecessors == 0) {
+            b->b_iused = 0;
+            b->b_except_handler = 0;
+       }
+    }
     return SUCCESS;
 }
 
@@ -1149,13 +1176,15 @@ jump_thread(cfg_instr *inst, cfg_instr *target, int opcode)
     assert(is_jump(target));
     // bpo-45773: If inst->i_target == target->i_target, then nothing actually
     // changes (and we fall into an infinite loop):
+    if (inst->i_loc.lineno == -1) assert(inst->i_loc_propagated);
+    if (target->i_loc.lineno == -1) assert(target->i_loc_propagated);
     if ((inst->i_loc.lineno == target->i_loc.lineno ||
-         inst->i_loc.lineno == -1 || target->i_loc.lineno == -1) &&
+         inst->i_loc_propagated || target->i_loc_propagated) &&
         inst->i_target != target->i_target)
     {
         inst->i_target = target->i_target;
         inst->i_opcode = opcode;
-        if (inst->i_loc.lineno == -1) {
+        if (inst->i_loc_propagated && !target->i_loc_propagated) {
             inst->i_loc = target->i_loc;
         }
         return true;
@@ -1714,6 +1743,7 @@ error:
     return ERROR;
 }
 
+static int resolve_line_numbers(cfg_builder *g, int firstlineno);
 
 /* Perform optimizations on a control flow graph.
    The consts object should still be in list form to allow new constants
@@ -1723,41 +1753,31 @@ error:
    NOPs.  Later those NOPs are removed.
 */
 static int
-optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache)
+optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
 {
     assert(PyDict_CheckExact(const_cache));
     RETURN_IF_ERROR(check_cfg(g));
     for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
         RETURN_IF_ERROR(inline_small_exit_blocks(b));
     }
+    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
+    RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
     for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
         RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
-        assert(b->b_predecessors == 0);
     }
     RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
     for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
         RETURN_IF_ERROR(inline_small_exit_blocks(b));
     }
-    RETURN_IF_ERROR(mark_reachable(g->g_entryblock));
-
-    /* Delete unreachable instructions */
-    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
-       if (b->b_predecessors == 0) {
-            b->b_iused = 0;
-            b->b_except_handler = 0;
-       }
-    }
-    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
-        remove_redundant_nops(b);
-    }
-    RETURN_IF_ERROR(remove_redundant_jumps(g));
+    RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
 
-    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
-        remove_redundant_nops(b);
+    for (int n = 0; n < 2; n++) {
+        for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
+            remove_redundant_nops(b);
+        }
+        RETURN_IF_ERROR(remove_redundant_jumps(g));
     }
 
-    RETURN_IF_ERROR(remove_redundant_jumps(g));
-
     assert(no_redundant_jumps(g));
     return SUCCESS;
 }
@@ -2174,7 +2194,13 @@ push_cold_blocks_to_end(cfg_builder *g) {
             if (!IS_LABEL(b->b_next->b_label)) {
                 b->b_next->b_label.id = next_lbl++;
             }
-            basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id, NO_LOCATION);
+            cfg_instr *prev_instr = basicblock_last_instr(b);
+            // b cannot be empty because at the end of an exception handler
+            // there is always a POP_EXCEPT + RERAISE/RETURN
+            assert(prev_instr);
+
+            basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id,
+                             prev_instr->i_loc);
             explicit_jump->b_cold = 1;
             explicit_jump->b_next = b->b_next;
             b->b_next = explicit_jump;
@@ -2345,6 +2371,7 @@ propagate_line_numbers(basicblock *entryblock) {
         for (int i = 0; i < b->b_iused; i++) {
             if (b->b_instr[i].i_loc.lineno < 0) {
                 b->b_instr[i].i_loc = prev_location;
+                b->b_instr[i].i_loc_propagated = 1;
             }
             else {
                 prev_location = b->b_instr[i].i_loc;
@@ -2354,6 +2381,7 @@ propagate_line_numbers(basicblock *entryblock) {
             if (b->b_next->b_iused > 0) {
                 if (b->b_next->b_instr[0].i_loc.lineno < 0) {
                     b->b_next->b_instr[0].i_loc = prev_location;
+                    b->b_next->b_instr[0].i_loc_propagated = 1;
                 }
             }
         }
@@ -2362,46 +2390,18 @@ propagate_line_numbers(basicblock *entryblock) {
             if (target->b_predecessors == 1) {
                 if (target->b_instr[0].i_loc.lineno < 0) {
                     target->b_instr[0].i_loc = prev_location;
+                    target->b_instr[0].i_loc_propagated = 1;
                 }
             }
         }
     }
 }
 
-/* Make sure that all returns have a line number, even if early passes
- * have failed to propagate a correct line number.
- * The resulting line number may not be correct according to PEP 626,
- * but should be "good enough", and no worse than in older versions. */
-static void
-guarantee_lineno_for_exits(basicblock *entryblock, int firstlineno) {
-    int lineno = firstlineno;
-    assert(lineno > 0);
-    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
-        cfg_instr *last = basicblock_last_instr(b);
-        if (last == NULL) {
-            continue;
-        }
-        if (last->i_loc.lineno < 0) {
-            if (last->i_opcode == RETURN_VALUE) {
-                for (int i = 0; i < b->b_iused; i++) {
-                    assert(b->b_instr[i].i_loc.lineno < 0);
-
-                    b->b_instr[i].i_loc.lineno = lineno;
-                }
-            }
-        }
-        else {
-            lineno = last->i_loc.lineno;
-        }
-    }
-}
-
 static int
 resolve_line_numbers(cfg_builder *g, int firstlineno)
 {
     RETURN_IF_ERROR(duplicate_exits_without_lineno(g));
     propagate_line_numbers(g->g_entryblock);
-    guarantee_lineno_for_exits(g->g_entryblock, firstlineno);
     return SUCCESS;
 }
 
@@ -2417,7 +2417,7 @@ _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
     RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
 
     /** Optimization **/
-    RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache));
+    RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
     RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
     RETURN_IF_ERROR(
         add_checks_for_loads_of_uninitialized_variables(
@@ -2425,6 +2425,7 @@ _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
     insert_superinstructions(g);
 
     RETURN_IF_ERROR(push_cold_blocks_to_end(g));
+    assert(all_exits_have_lineno(g->g_entryblock));
     RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
     return SUCCESS;
 }