]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-118095: Unify the behavior of tier 2 FOR_ITER branch micro-ops (GH-118420)
authorMark Shannon <mark@hotpy.org>
Thu, 2 May 2024 15:17:59 +0000 (16:17 +0100)
committerGitHub <noreply@github.com>
Thu, 2 May 2024 15:17:59 +0000 (16:17 +0100)
* Target _FOR_ITER_TIER_TWO at POP_TOP following the matching END_FOR

* Modify _GUARD_NOT_EXHAUSTED_RANGE, _GUARD_NOT_EXHAUSTED_LIST and _GUARD_NOT_EXHAUSTED_TUPLE so that they also target the POP_TOP following the matching END_FOR

Python/bytecodes.c
Python/executor_cases.c.h
Python/optimizer.c
Python/optimizer_symbols.c
Python/specialize.c

index b1be40dd38ce2133d9385251938d9c8039de60f4..9769cfe68aaeba2ccdecf061e27e10910c84646a 100644 (file)
@@ -2610,9 +2610,7 @@ dummy_func(
                     _PyErr_Clear(tstate);
                 }
                 /* iterator ended normally */
-                Py_DECREF(iter);
-                STACK_SHRINK(1);
-                /* The translator sets the deopt target just past END_FOR */
+                /* The translator sets the deopt target just past the matching END_FOR */
                 DEOPT_IF(true);
             }
             // Common case: no jump, leave it to the code generator
index f97f279836eb6606c714611e3a7deae013d522c3..03db9b623cbd869823c0cbf445c6b90645ddafcc 100644 (file)
                     _PyErr_Clear(tstate);
                 }
                 /* iterator ended normally */
-                Py_DECREF(iter);
-                STACK_SHRINK(1);
-                /* The translator sets the deopt target just past END_FOR */
+                /* The translator sets the deopt target just past the matching END_FOR */
                 if (true) {
                     UOP_STAT_INC(uopcode, miss);
                     JUMP_TO_JUMP_TARGET();
index 050947396d1724cd05acce7c60bc3ba02d99fe52..56768ae8f542f611ddd83e8d41e0192eb8d72ea6 100644 (file)
 
 #define MAX_EXECUTORS_SIZE 256
 
+#ifdef Py_DEBUG
+static int
+base_opcode(PyCodeObject *code, int offset)
+{
+    int opcode = _Py_GetBaseOpcode(code, offset);
+    if (opcode == ENTER_EXECUTOR) {
+        int oparg = _PyCode_CODE(code)[offset].op.arg;
+        _PyExecutorObject *ex = code->co_executors->executors[oparg];
+        return ex->vm_data.opcode;
+    }
+    return opcode;
+}
+#endif
 
 static bool
 has_space_for_executor(PyCodeObject *code, _Py_CODEUNIT *instr)
@@ -445,6 +458,14 @@ _PyUOp_Replacements[MAX_UOP_ID + 1] = {
     [_FOR_ITER] = _FOR_ITER_TIER_TWO,
 };
 
+static const uint8_t
+is_for_iter_test[MAX_UOP_ID + 1] = {
+    [_GUARD_NOT_EXHAUSTED_RANGE] = 1,
+    [_GUARD_NOT_EXHAUSTED_LIST] = 1,
+    [_GUARD_NOT_EXHAUSTED_TUPLE] = 1,
+    [_FOR_ITER_TIER_TWO] = 1,
+};
+
 static const uint16_t
 BRANCH_TO_GUARD[4][2] = {
     [POP_JUMP_IF_FALSE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_TRUE_POP,
@@ -594,7 +615,6 @@ top:  // Jump here after _PUSH_FRAME or likely branches
 
         uint32_t opcode = instr->op.code;
         uint32_t oparg = instr->op.arg;
-        uint32_t extended = 0;
 
         DPRINTF(2, "%d: %s(%d)\n", target, _PyOpcode_OpName[opcode], oparg);
 
@@ -608,7 +628,6 @@ top:  // Jump here after _PUSH_FRAME or likely branches
 
         if (opcode == EXTENDED_ARG) {
             instr++;
-            extended = 1;
             opcode = instr->op.code;
             oparg = (oparg << 8) | instr->op.arg;
             if (opcode == EXTENDED_ARG) {
@@ -772,12 +791,15 @@ top:  // Jump here after _PUSH_FRAME or likely branches
                             case OPARG_REPLACED:
                                 uop = _PyUOp_Replacements[uop];
                                 assert(uop != 0);
-                                if (uop == _FOR_ITER_TIER_TWO) {
-                                    target += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 2 + extended;
-                                    assert(_PyCode_CODE(code)[target-2].op.code == END_FOR ||
-                                            _PyCode_CODE(code)[target-2].op.code == INSTRUMENTED_END_FOR);
-                                    assert(_PyCode_CODE(code)[target-1].op.code == POP_TOP);
+#ifdef Py_DEBUG
+                                {
+                                    uint32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + (oparg > 255);
+                                    uint32_t jump_target = next_inst + oparg;
+                                    assert(base_opcode(code, jump_target) == END_FOR ||
+                                        base_opcode(code, jump_target) == INSTRUMENTED_END_FOR);
+                                    assert(base_opcode(code, jump_target+1) == POP_TOP);
                                 }
+#endif
                                 break;
                             default:
                                 fprintf(stderr,
@@ -1000,10 +1022,18 @@ prepare_for_execution(_PyUOpInstruction *buffer, int length)
         if (_PyUop_Flags[opcode] & (HAS_EXIT_FLAG | HAS_DEOPT_FLAG)) {
             uint16_t exit_op = (_PyUop_Flags[opcode] & HAS_EXIT_FLAG) ?
                 _SIDE_EXIT : _DEOPT;
-            if (target != current_jump_target || current_exit_op != exit_op) {
-                make_exit(&buffer[next_spare], exit_op, target);
+            int32_t jump_target = target;
+            if (is_for_iter_test[opcode]) {
+                /* Target the POP_TOP immediately after the END_FOR,
+                 * leaving only the iterator on the stack. */
+                int extended_arg = inst->oparg > 255;
+                int32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + extended_arg;
+                jump_target = next_inst + inst->oparg + 1;
+            }
+            if (jump_target != current_jump_target || current_exit_op != exit_op) {
+                make_exit(&buffer[next_spare], exit_op, jump_target);
                 current_exit_op = exit_op;
-                current_jump_target = target;
+                current_jump_target = jump_target;
                 current_jump = next_spare;
                 next_spare++;
             }
index d52f490853c006a3edebebbd7823bd15bbe9eca9..4aeb04fe0405d228c10eb243da3988df6288a702 100644 (file)
@@ -164,19 +164,26 @@ _Py_uop_sym_set_const(_Py_UopsSymbol *sym, PyObject *const_val)
     return true;
 }
 
-
 bool
 _Py_uop_sym_set_null(_Py_UopsSymbol *sym)
 {
+    if (_Py_uop_sym_is_not_null(sym)) {
+        sym_set_bottom(sym);
+        return false;
+    }
     sym_set_flag(sym, IS_NULL);
-    return !_Py_uop_sym_is_bottom(sym);
+    return true;
 }
 
 bool
 _Py_uop_sym_set_non_null(_Py_UopsSymbol *sym)
 {
+    if (_Py_uop_sym_is_null(sym)) {
+        sym_set_bottom(sym);
+        return false;
+    }
     sym_set_flag(sym, NOT_NULL);
-    return !_Py_uop_sym_is_bottom(sym);
+    return true;
 }
 
 
index ee51781372166a8265bcb386b2138e326187c247..72114f27f69c52a4066e0c76a9c813116114990e 100644 (file)
@@ -215,6 +215,7 @@ print_gc_stats(FILE *out, GCStats *stats)
     }
 }
 
+#ifdef _Py_TIER2
 static void
 print_histogram(FILE *out, const char *name, uint64_t hist[_Py_UOP_HIST_SIZE])
 {
@@ -249,7 +250,6 @@ print_optimization_stats(FILE *out, OptimizationStats *stats)
             stats->optimizer_failure_reason_no_memory);
     fprintf(out, "Optimizer remove globals builtins changed: %" PRIu64 "\n", stats->remove_globals_builtins_changed);
     fprintf(out, "Optimizer remove globals incorrect keys: %" PRIu64 "\n", stats->remove_globals_incorrect_keys);
-
     for (int i = 0; i <= MAX_UOP_ID; i++) {
         if (stats->opcode[i].execution_count) {
             fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].execution_count);
@@ -258,7 +258,6 @@ print_optimization_stats(FILE *out, OptimizationStats *stats)
             fprintf(out, "uops[%s].specialization.miss : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].miss);
         }
     }
-
     for (int i = 0; i < 256; i++) {
         if (stats->unsupported_opcode[i]) {
             fprintf(
@@ -289,6 +288,7 @@ print_optimization_stats(FILE *out, OptimizationStats *stats)
         }
     }
 }
+#endif
 
 static void
 print_rare_event_stats(FILE *out, RareEventStats *stats)
@@ -309,7 +309,9 @@ print_stats(FILE *out, PyStats *stats)
     print_call_stats(out, &stats->call_stats);
     print_object_stats(out, &stats->object_stats);
     print_gc_stats(out, stats->gc_stats);
+#ifdef _Py_TIER2
     print_optimization_stats(out, &stats->optimization_stats);
+#endif
     print_rare_event_stats(out, &stats->rare_event_stats);
 }