]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-113853: Guarantee forward progress in executors (GH-113854)
authorMark Shannon <mark@hotpy.org>
Thu, 11 Jan 2024 18:20:42 +0000 (18:20 +0000)
committerGitHub <noreply@github.com>
Thu, 11 Jan 2024 18:20:42 +0000 (18:20 +0000)
Include/cpython/optimizer.h
Misc/NEWS.d/next/Core and Builtins/2024-01-08-05-36-59.gh-issue-113853.lm-6_a.rst [new file with mode: 0644]
Python/bytecodes.c
Python/generated_cases.c.h
Python/optimizer.c

index f077da7ee88456279a3784ac90fd261a77ab8b11..0e7bc9fdd7a07d1a05c7db33a9fc60163b7763ef 100644 (file)
@@ -65,7 +65,7 @@ PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void);
 PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int offset);
 
 int
-_PyOptimizer_BackEdge(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNIT *dest, PyObject **stack_pointer);
+_PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer);
 
 extern _PyOptimizerObject _PyOptimizer_Default;
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-08-05-36-59.gh-issue-113853.lm-6_a.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-08-05-36-59.gh-issue-113853.lm-6_a.rst
new file mode 100644 (file)
index 0000000..d4f0a76
--- /dev/null
@@ -0,0 +1,2 @@
+Guarantee that all executors make progress. This then guarantees that tier 2
+execution always makes progress.
index f53ddae8df985ac877804574afddbecd24aa5e8d..2011d963e3687c5379aa828461075c6b94d4ca7f 100644 (file)
@@ -2325,12 +2325,18 @@ dummy_func(
             // Double-check that the opcode isn't instrumented or something:
             if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) {
                 OPT_STAT_INC(attempts);
-                int optimized = _PyOptimizer_BackEdge(frame, this_instr, next_instr, stack_pointer);
+                _Py_CODEUNIT *start = this_instr;
+                /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
+                while (oparg > 255) {
+                    oparg >>= 8;
+                    start--;
+                }
+                int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer);
                 ERROR_IF(optimized < 0, error);
                 if (optimized) {
                     // Rewind and enter the executor:
-                    assert(this_instr->op.code == ENTER_EXECUTOR);
-                    next_instr = this_instr;
+                    assert(start->op.code == ENTER_EXECUTOR);
+                    next_instr = start;
                     this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1);
                 }
                 else {
@@ -2370,10 +2376,12 @@ dummy_func(
                 GOTO_TIER_TWO();
             }
             else {
-                code->co_executors->executors[oparg & 255] = NULL;
+                /* ENTER_EXECUTOR will be the first code unit of the instruction */
+                assert(oparg < 256);
+                code->co_executors->executors[oparg] = NULL;
                 opcode = this_instr->op.code = executor->vm_data.opcode;
                 this_instr->op.arg = executor->vm_data.oparg;
-                oparg = (oparg & (~255)) | executor->vm_data.oparg;
+                oparg = executor->vm_data.oparg;
                 Py_DECREF(executor);
                 next_instr = this_instr;
                 DISPATCH_GOTO();
index e693e3e2560e7bada99ff7e1388a82d271a5dcae..1e995b62a72fcfac0196dc94e378d028ef36bcad 100644 (file)
                 GOTO_TIER_TWO();
             }
             else {
-                code->co_executors->executors[oparg & 255] = NULL;
+                /* ENTER_EXECUTOR will be the first code unit of the instruction */
+                assert(oparg < 256);
+                code->co_executors->executors[oparg] = NULL;
                 opcode = this_instr->op.code = executor->vm_data.opcode;
                 this_instr->op.arg = executor->vm_data.oparg;
-                oparg = (oparg & (~255)) | executor->vm_data.oparg;
+                oparg = executor->vm_data.oparg;
                 Py_DECREF(executor);
                 next_instr = this_instr;
                 DISPATCH_GOTO();
             // Double-check that the opcode isn't instrumented or something:
             if (ucounter > threshold && this_instr->op.code == JUMP_BACKWARD) {
                 OPT_STAT_INC(attempts);
-                int optimized = _PyOptimizer_BackEdge(frame, this_instr, next_instr, stack_pointer);
+                _Py_CODEUNIT *start = this_instr;
+                /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */
+                while (oparg > 255) {
+                    oparg >>= 8;
+                    start--;
+                }
+                int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer);
                 if (optimized < 0) goto error;
                 if (optimized) {
                     // Rewind and enter the executor:
-                    assert(this_instr->op.code == ENTER_EXECUTOR);
-                    next_instr = this_instr;
+                    assert(start->op.code == ENTER_EXECUTOR);
+                    next_instr = start;
                     this_instr[1].cache &= ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1);
                 }
                 else {
index 28e12dbbf5d78b2d3fbc4ca21472cc793af90d60..227d6be009241922d911aa87d2ee9dba2f8d9a83 100644 (file)
@@ -162,24 +162,22 @@ PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
 }
 
 int
-_PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNIT *dest, PyObject **stack_pointer)
+_PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer)
 {
-    assert(src->op.code == JUMP_BACKWARD);
     PyCodeObject *code = (PyCodeObject *)frame->f_executable;
     assert(PyCode_Check(code));
     PyInterpreterState *interp = _PyInterpreterState_GET();
-    if (!has_space_for_executor(code, src)) {
+    if (!has_space_for_executor(code, start)) {
         return 0;
     }
     _PyOptimizerObject *opt = interp->optimizer;
     _PyExecutorObject *executor = NULL;
-    /* Start optimizing at the destination to guarantee forward progress */
-    int err = opt->optimize(opt, code, dest, &executor, (int)(stack_pointer - _PyFrame_Stackbase(frame)));
+    int err = opt->optimize(opt, code, start, &executor, (int)(stack_pointer - _PyFrame_Stackbase(frame)));
     if (err <= 0) {
         assert(executor == NULL);
         return err;
     }
-    int index = get_index_for_executor(code, src);
+    int index = get_index_for_executor(code, start);
     if (index < 0) {
         /* Out of memory. Don't raise and assume that the
          * error will show up elsewhere.
@@ -190,7 +188,7 @@ _PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNI
         Py_DECREF(executor);
         return 0;
     }
-    insert_executor(code, src, index, executor);
+    insert_executor(code, start, index, executor);
     Py_DECREF(executor);
     return 1;
 }
@@ -316,38 +314,6 @@ BRANCH_TO_GUARD[4][2] = {
 #define CONFIDENCE_RANGE 1000
 #define CONFIDENCE_CUTOFF 333
 
-/* Returns 1 on success,
- * 0 if it failed to produce a worthwhile trace,
- * and -1 on an error.
- */
-static int
-translate_bytecode_to_trace(
-    PyCodeObject *code,
-    _Py_CODEUNIT *instr,
-    _PyUOpInstruction *trace,
-    int buffer_size,
-    _PyBloomFilter *dependencies)
-{
-    PyCodeObject *initial_code = code;
-    _Py_BloomFilter_Add(dependencies, initial_code);
-    _Py_CODEUNIT *initial_instr = instr;
-    int trace_length = 0;
-    int max_length = buffer_size;
-    struct {
-        PyCodeObject *code;
-        _Py_CODEUNIT *instr;
-    } trace_stack[TRACE_STACK_SIZE];
-    int trace_stack_depth = 0;
-    int confidence = CONFIDENCE_RANGE;  // Adjusted by branch instructions
-
-#ifdef Py_DEBUG
-    char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
-    int lltrace = 0;
-    if (python_lltrace != NULL && *python_lltrace >= '0') {
-        lltrace = *python_lltrace - '0';  // TODO: Parse an int and all that
-    }
-#endif
-
 #ifdef Py_DEBUG
 #define DPRINTF(level, ...) \
     if (lltrace >= (level)) { printf(__VA_ARGS__); }
@@ -403,6 +369,39 @@ translate_bytecode_to_trace(
     code = trace_stack[trace_stack_depth].code; \
     instr = trace_stack[trace_stack_depth].instr;
 
+/* Returns 1 on success,
+ * 0 if it failed to produce a worthwhile trace,
+ * and -1 on an error.
+ */
+static int
+translate_bytecode_to_trace(
+    PyCodeObject *code,
+    _Py_CODEUNIT *instr,
+    _PyUOpInstruction *trace,
+    int buffer_size,
+    _PyBloomFilter *dependencies)
+{
+    bool progress_needed = true;
+    PyCodeObject *initial_code = code;
+    _Py_BloomFilter_Add(dependencies, initial_code);
+    _Py_CODEUNIT *initial_instr = instr;
+    int trace_length = 0;
+    int max_length = buffer_size;
+    struct {
+        PyCodeObject *code;
+        _Py_CODEUNIT *instr;
+    } trace_stack[TRACE_STACK_SIZE];
+    int trace_stack_depth = 0;
+    int confidence = CONFIDENCE_RANGE;  // Adjusted by branch instructions
+
+#ifdef Py_DEBUG
+    char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
+    int lltrace = 0;
+    if (python_lltrace != NULL && *python_lltrace >= '0') {
+        lltrace = *python_lltrace - '0';  // TODO: Parse an int and all that
+    }
+#endif
+
     DPRINTF(4,
             "Optimizing %s (%s:%d) at byte offset %d\n",
             PyUnicode_AsUTF8(code->co_qualname),
@@ -410,6 +409,7 @@ translate_bytecode_to_trace(
             code->co_firstlineno,
             2 * INSTR_IP(initial_instr, code));
     uint32_t target = 0;
+
 top:  // Jump here after _PUSH_FRAME or likely branches
     for (;;) {
         target = INSTR_IP(instr, code);
@@ -421,6 +421,15 @@ top:  // Jump here after _PUSH_FRAME or likely branches
         uint32_t oparg = instr->op.arg;
         uint32_t extended = 0;
 
+        if (opcode == ENTER_EXECUTOR) {
+            assert(oparg < 256);
+            _PyExecutorObject *executor =
+                (_PyExecutorObject *)code->co_executors->executors[oparg];
+            opcode = executor->vm_data.opcode;
+            DPRINTF(2, "  * ENTER_EXECUTOR -> %s\n",  _PyOpcode_OpName[opcode]);
+            oparg = executor->vm_data.oparg;
+        }
+
         if (opcode == EXTENDED_ARG) {
             instr++;
             extended = 1;
@@ -431,13 +440,23 @@ top:  // Jump here after _PUSH_FRAME or likely branches
                 goto done;
             }
         }
-
-        if (opcode == ENTER_EXECUTOR) {
-            _PyExecutorObject *executor =
-                (_PyExecutorObject *)code->co_executors->executors[oparg&255];
-            opcode = executor->vm_data.opcode;
-            DPRINTF(2, "  * ENTER_EXECUTOR -> %s\n",  _PyOpcode_OpName[opcode]);
-            oparg = (oparg & 0xffffff00) | executor->vm_data.oparg;
+        assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);
+
+        /* Special case the first instruction,
+         * so that we can guarantee forward progress */
+        if (progress_needed) {
+            progress_needed = false;
+            if (opcode == JUMP_BACKWARD || opcode == JUMP_BACKWARD_NO_INTERRUPT) {
+                instr += 1 + _PyOpcode_Caches[opcode] - (int32_t)oparg;
+                initial_instr = instr;
+                continue;
+            }
+            else {
+                if (OPCODE_HAS_DEOPT(opcode)) {
+                    opcode = _PyOpcode_Deopt[opcode];
+                }
+                assert(!OPCODE_HAS_DEOPT(opcode));
+            }
         }
 
         switch (opcode) {
@@ -480,7 +499,9 @@ top:  // Jump here after _PUSH_FRAME or likely branches
             case JUMP_BACKWARD:
             case JUMP_BACKWARD_NO_INTERRUPT:
             {
-                if (instr + 2 - oparg == initial_instr && code == initial_code) {
+                _Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[opcode] - (int)oparg;
+                if (target == initial_instr) {
+                    /* We have looped round to the start */
                     RESERVE(1);
                     ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
                 }
@@ -641,19 +662,7 @@ done:
     }
     assert(code == initial_code);
     // Skip short traces like _SET_IP, LOAD_FAST, _SET_IP, _EXIT_TRACE
-    if (trace_length > 4) {
-        ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
-        DPRINTF(1,
-                "Created a trace for %s (%s:%d) at byte offset %d -- length %d\n",
-                PyUnicode_AsUTF8(code->co_qualname),
-                PyUnicode_AsUTF8(code->co_filename),
-                code->co_firstlineno,
-                2 * INSTR_IP(initial_instr, code),
-                trace_length);
-        OPT_HIST(trace_length + buffer_size - max_length, trace_length_hist);
-        return 1;
-    }
-    else {
+    if (progress_needed || trace_length < 5) {
         OPT_STAT_INC(trace_too_short);
         DPRINTF(4,
                 "No trace for %s (%s:%d) at byte offset %d\n",
@@ -661,15 +670,25 @@ done:
                 PyUnicode_AsUTF8(code->co_filename),
                 code->co_firstlineno,
                 2 * INSTR_IP(initial_instr, code));
+        return 0;
     }
-    return 0;
+    ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
+    DPRINTF(1,
+            "Created a trace for %s (%s:%d) at byte offset %d -- length %d\n",
+            PyUnicode_AsUTF8(code->co_qualname),
+            PyUnicode_AsUTF8(code->co_filename),
+            code->co_firstlineno,
+            2 * INSTR_IP(initial_instr, code),
+            trace_length);
+    OPT_HIST(trace_length + buffer_size - max_length, trace_length_hist);
+    return 1;
+}
 
 #undef RESERVE
 #undef RESERVE_RAW
 #undef INSTR_IP
 #undef ADD_TO_TRACE
 #undef DPRINTF
-}
 
 #define UNSET_BIT(array, bit) (array[(bit)>>5] &= ~(1<<((bit)&31)))
 #define SET_BIT(array, bit) (array[(bit)>>5] |= (1<<((bit)&31)))
@@ -854,10 +873,20 @@ counter_optimize(
     int Py_UNUSED(curr_stackentries)
 )
 {
+    int oparg = instr->op.arg;
+    while (instr->op.code == EXTENDED_ARG) {
+        instr++;
+        oparg = (oparg << 8) | instr->op.arg;
+    }
+    if (instr->op.code != JUMP_BACKWARD) {
+        /* Counter optimizer can only handle backward edges */
+        return 0;
+    }
+    _Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[JUMP_BACKWARD] - oparg;
     _PyUOpInstruction buffer[3] = {
         { .opcode = _LOAD_CONST_INLINE_BORROW, .operand = (uintptr_t)self },
         { .opcode = _INTERNAL_INCREMENT_OPT_COUNTER },
-        { .opcode = _EXIT_TRACE, .target = (uint32_t)(instr - _PyCode_CODE(code)) }
+        { .opcode = _EXIT_TRACE, .target = (uint32_t)(target - _PyCode_CODE(code)) }
     };
     _PyBloomFilter empty;
     _Py_BloomFilter_Init(&empty);