]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
Support underflow and yield value in the optimizer
authorKen Jin <28750310+Fidget-Spinner@users.noreply.github.com>
Sat, 25 Oct 2025 17:45:33 +0000 (18:45 +0100)
committerKen Jin <28750310+Fidget-Spinner@users.noreply.github.com>
Sat, 25 Oct 2025 17:45:33 +0000 (18:45 +0100)
Include/internal/pycore_optimizer.h
Python/optimizer.c
Python/optimizer_analysis.c
Python/optimizer_bytecodes.c
Python/optimizer_cases.c.h
Python/optimizer_symbols.c

index ca46249bde98dfb1614c7f72c07eae71f6f2dddf..7c542e322ecbc2bcf74ad47b759403b91ba48ac5 100644 (file)
@@ -250,6 +250,7 @@ struct _Py_UOpsAbstractFrame {
     int stack_len;
     int locals_len;
     PyFunctionObject *func;
+    PyCodeObject *code;
 
     JitOptRef *stack_pointer;
     JitOptRef *stack;
@@ -325,7 +326,7 @@ extern _Py_UOpsAbstractFrame *_Py_uop_frame_new(
     int curr_stackentries,
     JitOptRef *args,
     int arg_len);
-extern int _Py_uop_frame_pop(JitOptContext *ctx);
+extern int _Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co, int curr_stackentries);
 
 PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored);
 
index 9071265b508154b137424e483ca48ba2e1389a37..ef7e518faaed8978d8da5e83972d662c32668cbb 100644 (file)
@@ -872,6 +872,9 @@ _PyJit_translate_single_bytecode_to_trace(
                     else {
                         operand = 0;
                     }
+                    ADD_TO_TRACE(uop, oparg, operand, target);
+                    trace[trace_length - 1].operand1 = ((int)(frame->stackpointer - _PyFrame_Stackbase(frame)));
+                    break;
                 }
                 if (uop == _BINARY_OP_INPLACE_ADD_UNICODE) {
                     assert(i + 1 == nuops);
index 2d3f1d95d5ab14f1f051abb8f7d80d1a011d7cfb..c576f3b716bc2ab57e09411985629531a46a5c3e 100644 (file)
@@ -267,7 +267,7 @@ static
 PyCodeObject *
 get_current_code_object(JitOptContext *ctx)
 {
-    return (PyCodeObject *)ctx->frame->func->func_code;
+    return (PyCodeObject *)ctx->frame->code;
 }
 
 static PyObject *
@@ -298,10 +298,6 @@ optimize_uops(
     JitOptContext context;
     JitOptContext *ctx = &context;
     uint32_t opcode = UINT16_MAX;
-    int curr_space = 0;
-    int max_space = 0;
-    _PyUOpInstruction *first_valid_check_stack = NULL;
-    _PyUOpInstruction *corresponding_check_stack = NULL;
 
     // Make sure that watchers are set up
     PyInterpreterState *interp = _PyInterpreterState_GET();
@@ -368,14 +364,6 @@ optimize_uops(
     /* Either reached the end or cannot optimize further, but there
      * would be no benefit in retrying later */
     _Py_uop_abstractcontext_fini(ctx);
-    if (first_valid_check_stack != NULL) {
-        assert(first_valid_check_stack->opcode == _CHECK_STACK_SPACE);
-        assert(max_space > 0);
-        assert(max_space <= INT_MAX);
-        assert(max_space <= INT32_MAX);
-        first_valid_check_stack->opcode = _CHECK_STACK_SPACE_OPERAND;
-        first_valid_check_stack->operand0 = max_space;
-    }
     return trace_len;
 
 error:
index 09c5ae764e5ac393f1c37bc0c7eef70ed0e1c4a0..7e4a9778a89efe8ac548722da7ea9cf327bfa141 100644 (file)
@@ -773,50 +773,55 @@ dummy_func(void) {
         JitOptRef temp = PyJitRef_StripReferenceInfo(retval);
         DEAD(retval);
         SAVE_STACK();
-        PyCodeObject *co = get_current_code_object(ctx);
         ctx->frame->stack_pointer = stack_pointer;
-        if (frame_pop(ctx)) {
+        PyCodeObject *returning_code = get_code_with_logging(this_instr);
+        if (returning_code == NULL) {
+            ctx->done = true;
+            break;
+        }
+        int returning_stacklevel = this_instr->operand1;
+        if (frame_pop(ctx, returning_code, returning_stacklevel)) {
             break;
         }
         stack_pointer = ctx->frame->stack_pointer;
 
-        /* Stack space handling */
-        assert(corresponding_check_stack == NULL);
-        assert(co != NULL);
-        int framesize = co->co_framesize;
-        assert(framesize > 0);
-        assert(framesize <= curr_space);
-        curr_space -= framesize;
-
         RELOAD_STACK();
         res = temp;
     }
 
     op(_RETURN_GENERATOR, ( -- res)) {
         SYNC_SP();
-        PyCodeObject *co = get_current_code_object(ctx);
         ctx->frame->stack_pointer = stack_pointer;
-        frame_pop(ctx);
+        PyCodeObject *returning_code = get_code_with_logging(this_instr);
+        if (returning_code == NULL) {
+            ctx->done = true;
+            break;
+        }
+        int returning_stacklevel = this_instr->operand1;
+        if (frame_pop(ctx, returning_code, returning_stacklevel)) {
+            break;
+        }
         stack_pointer = ctx->frame->stack_pointer;
         res = sym_new_unknown(ctx);
-        /* Stack space handling */
-        assert(corresponding_check_stack == NULL);
-        assert(co != NULL);
-        int framesize = co->co_framesize;
-        assert(framesize > 0);
-        assert(framesize <= curr_space);
-        curr_space -= framesize;
-    }
-
-    op(_YIELD_VALUE, (unused -- value)) {
-        // TODO (gh-139109): handle this properly in a future optimization.
-        // A possibility to handle underflows is to just restore the current frame information
-        // from whatever is stored in the trace we record at that point of time.
-        // E.g. we record at this YIELD_VALUE, func_obj=x , stack_level=4
-        // We can restore it to there.
-        value = sym_new_unknown(ctx);
-        ctx->done = true;
-        ctx->out_of_space = true;
+    }
+
+    op(_YIELD_VALUE, (retval -- value)) {
+        // Mimics PyStackRef_MakeHeapSafe in the interpreter.
+        JitOptRef temp = PyJitRef_StripReferenceInfo(retval);
+        DEAD(retval);
+        SAVE_STACK();
+        PyCodeObject *returning_code = get_code_with_logging(this_instr);
+        if (returning_code == NULL) {
+            ctx->done = true;
+            break;
+        }
+        int returning_stacklevel = this_instr->operand1;
+        if (frame_pop(ctx, returning_code, returning_stacklevel)) {
+            break;
+        }
+        stack_pointer = ctx->frame->stack_pointer;
+        RELOAD_STACK();
+        value = temp;
     }
 
     op(_GET_ITER, (iterable -- iter, index_or_null)) {
@@ -843,8 +848,6 @@ dummy_func(void) {
     }
 
     op(_CHECK_STACK_SPACE, (unused, unused, unused[oparg] -- unused, unused, unused[oparg])) {
-        assert(corresponding_check_stack == NULL);
-        corresponding_check_stack = this_instr;
     }
 
     op (_CHECK_STACK_SPACE_OPERAND, (framesize/2 -- )) {
@@ -870,24 +873,6 @@ dummy_func(void) {
         PyCodeObject *co = (PyCodeObject *)func->func_code;
         assert(PyFunction_Check(func));
         ctx->frame->func = func;
-        /* Stack space handling */
-        int framesize = co->co_framesize;
-        assert(framesize > 0);
-        curr_space += framesize;
-        if (curr_space < 0 || curr_space > INT32_MAX) {
-            // won't fit in signed 32-bit int
-            ctx->done = true;
-            break;
-        }
-        max_space = curr_space > max_space ? curr_space : max_space;
-        if (first_valid_check_stack == NULL) {
-            first_valid_check_stack = corresponding_check_stack;
-        }
-        else if (corresponding_check_stack) {
-            // delete all but the first valid _CHECK_STACK_SPACE
-            corresponding_check_stack->opcode = _NOP;
-        }
-        corresponding_check_stack = NULL;
     }
 
     op(_UNPACK_SEQUENCE, (seq -- values[oparg], top[0])) {
index 002da75ea38481005192e58f8cc1d743d5e6d9d9..5192280e5ca20f5270b8c5ff40e67e022fca9ac7 100644 (file)
             JitOptRef temp = PyJitRef_StripReferenceInfo(retval);
             stack_pointer += -1;
             assert(WITHIN_STACK_BOUNDS());
-            PyCodeObject *co = get_current_code_object(ctx);
             ctx->frame->stack_pointer = stack_pointer;
-            if (frame_pop(ctx)) {
+            PyCodeObject *returning_code = get_code_with_logging(this_instr);
+            if (returning_code == NULL) {
+                ctx->done = true;
+                break;
+            }
+            int returning_stacklevel = this_instr->operand1;
+            if (frame_pop(ctx, returning_code, returning_stacklevel)) {
                 break;
             }
             stack_pointer = ctx->frame->stack_pointer;
-            assert(corresponding_check_stack == NULL);
-            assert(co != NULL);
-            int framesize = co->co_framesize;
-            assert(framesize > 0);
-            assert(framesize <= curr_space);
-            curr_space -= framesize;
             res = temp;
             stack_pointer[0] = res;
             stack_pointer += 1;
         }
 
         case _YIELD_VALUE: {
+            JitOptRef retval;
             JitOptRef value;
-            value = sym_new_unknown(ctx);
-            ctx->done = true;
-            ctx->out_of_space = true;
-            stack_pointer[-1] = value;
+            retval = stack_pointer[-1];
+            JitOptRef temp = PyJitRef_StripReferenceInfo(retval);
+            stack_pointer += -1;
+            assert(WITHIN_STACK_BOUNDS());
+            PyCodeObject *returning_code = get_code_with_logging(this_instr);
+            if (returning_code == NULL) {
+                ctx->done = true;
+                break;
+            }
+            int returning_stacklevel = this_instr->operand1;
+            if (frame_pop(ctx, returning_code, returning_stacklevel)) {
+                break;
+            }
+            stack_pointer = ctx->frame->stack_pointer;
+            value = temp;
+            stack_pointer[0] = value;
+            stack_pointer += 1;
+            assert(WITHIN_STACK_BOUNDS());
             break;
         }
 
         }
 
         case _CHECK_STACK_SPACE: {
-            assert(corresponding_check_stack == NULL);
-            corresponding_check_stack = this_instr;
             break;
         }
 
             PyCodeObject *co = (PyCodeObject *)func->func_code;
             assert(PyFunction_Check(func));
             ctx->frame->func = func;
-            int framesize = co->co_framesize;
-            assert(framesize > 0);
-            curr_space += framesize;
-            if (curr_space < 0 || curr_space > INT32_MAX) {
-                ctx->done = true;
-                break;
-            }
-            max_space = curr_space > max_space ? curr_space : max_space;
-            if (first_valid_check_stack == NULL) {
-                first_valid_check_stack = corresponding_check_stack;
-            }
-            else if (corresponding_check_stack) {
-                corresponding_check_stack->opcode = _NOP;
-            }
-            corresponding_check_stack = NULL;
             break;
         }
 
 
         case _RETURN_GENERATOR: {
             JitOptRef res;
-            PyCodeObject *co = get_current_code_object(ctx);
             ctx->frame->stack_pointer = stack_pointer;
-            frame_pop(ctx);
+            PyCodeObject *returning_code = get_code_with_logging(this_instr);
+            if (returning_code == NULL) {
+                ctx->done = true;
+                break;
+            }
+            int returning_stacklevel = this_instr->operand1;
+            if (frame_pop(ctx, returning_code, returning_stacklevel)) {
+                break;
+            }
             stack_pointer = ctx->frame->stack_pointer;
             res = sym_new_unknown(ctx);
-            assert(corresponding_check_stack == NULL);
-            assert(co != NULL);
-            int framesize = co->co_framesize;
-            assert(framesize > 0);
-            assert(framesize <= curr_space);
-            curr_space -= framesize;
             stack_pointer[0] = res;
             stack_pointer += 1;
             assert(WITHIN_STACK_BOUNDS());
index b0997f56b98b944f0587dc8e80fc6b47a06edb80..b9747b7fd8410f591154629cc32641c6d234d06d 100644 (file)
@@ -824,6 +824,7 @@ _Py_uop_frame_new(
     }
     _Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth];
 
+    frame->code = co;
     frame->stack_len = co->co_stacksize;
     frame->locals_len = co->co_nlocalsplus;
 
@@ -905,18 +906,41 @@ _Py_uop_abstractcontext_init(JitOptContext *ctx)
 }
 
 int
-_Py_uop_frame_pop(JitOptContext *ctx)
+_Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co, int curr_stackentries)
 {
     _Py_UOpsAbstractFrame *frame = ctx->frame;
     ctx->n_consumed = frame->locals;
+
     ctx->curr_frame_depth--;
-    // TODO gh-139109: Handle trace recording underflow
-    if (ctx->curr_frame_depth == 0) {
-        ctx->done = true;
-        ctx->out_of_space = true;
+
+    if (ctx->curr_frame_depth >= 1) {
+        ctx->frame = &ctx->frames[ctx->curr_frame_depth - 1];
+
+        // We returned to the correct code. Nothing to do here.
+        if (co == ctx->frame->code) {
+            return 0;
+        }
+        // Else: the code we recorded doesn't match the code we *think* we're
+        // returning to. We could trace anything, we can't just return to the
+        // old frame. We have to restore what the tracer recorded
+        // as the traced next frame.
+        // Remove the current frame, and later swap it out with the right one.
+        else {
+            ctx->curr_frame_depth--;
+        }
+    }
+    // Else: trace stack underflow.
+
+    // This handles swapping out frames.
+    assert(curr_stackentries >= 1);
+    // -1 to stackentries as we push to the stack our return value after this.
+    _Py_UOpsAbstractFrame *new_frame = _Py_uop_frame_new(ctx, co, curr_stackentries - 1, NULL, 0);
+    if (new_frame == NULL) {
         return 1;
     }
-    ctx->frame = &ctx->frames[ctx->curr_frame_depth - 1];
+
+    ctx->curr_frame_depth++;
+    ctx->frame = new_frame;
 
     return 0;
 }