From 4a4a31f0962734df3555200b0a9846c1f6356fbe Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 25 Oct 2025 18:45:33 +0100 Subject: [PATCH] Support underflow and yield value in the optimizer --- Include/internal/pycore_optimizer.h | 3 +- Python/optimizer.c | 3 + Python/optimizer_analysis.c | 14 +---- Python/optimizer_bytecodes.c | 85 ++++++++++++----------------- Python/optimizer_cases.c.h | 72 ++++++++++++------------ Python/optimizer_symbols.c | 36 ++++++++++-- 6 files changed, 106 insertions(+), 107 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index ca46249bde98..7c542e322ecb 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -250,6 +250,7 @@ struct _Py_UOpsAbstractFrame { int stack_len; int locals_len; PyFunctionObject *func; + PyCodeObject *code; JitOptRef *stack_pointer; JitOptRef *stack; @@ -325,7 +326,7 @@ extern _Py_UOpsAbstractFrame *_Py_uop_frame_new( int curr_stackentries, JitOptRef *args, int arg_len); -extern int _Py_uop_frame_pop(JitOptContext *ctx); +extern int _Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co, int curr_stackentries); PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored); diff --git a/Python/optimizer.c b/Python/optimizer.c index 9071265b5081..ef7e518faaed 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -872,6 +872,9 @@ _PyJit_translate_single_bytecode_to_trace( else { operand = 0; } + ADD_TO_TRACE(uop, oparg, operand, target); + trace[trace_length - 1].operand1 = ((int)(frame->stackpointer - _PyFrame_Stackbase(frame))); + break; } if (uop == _BINARY_OP_INPLACE_ADD_UNICODE) { assert(i + 1 == nuops); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 2d3f1d95d5ab..c576f3b716bc 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -267,7 +267,7 @@ static PyCodeObject * get_current_code_object(JitOptContext *ctx) { - return (PyCodeObject *)ctx->frame->func->func_code; + return (PyCodeObject *)ctx->frame->code; } static PyObject * @@ -298,10 +298,6 @@ optimize_uops( JitOptContext context; JitOptContext *ctx = &context; uint32_t opcode = UINT16_MAX; - int curr_space = 0; - int max_space = 0; - _PyUOpInstruction *first_valid_check_stack = NULL; - _PyUOpInstruction *corresponding_check_stack = NULL; // Make sure that watchers are set up PyInterpreterState *interp = _PyInterpreterState_GET(); @@ -368,14 +364,6 @@ optimize_uops( /* Either reached the end or cannot optimize further, but there * would be no benefit in retrying later */ _Py_uop_abstractcontext_fini(ctx); - if (first_valid_check_stack != NULL) { - assert(first_valid_check_stack->opcode == _CHECK_STACK_SPACE); - assert(max_space > 0); - assert(max_space <= INT_MAX); - assert(max_space <= INT32_MAX); - first_valid_check_stack->opcode = _CHECK_STACK_SPACE_OPERAND; - first_valid_check_stack->operand0 = max_space; - } return trace_len; error: diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 09c5ae764e5a..7e4a9778a89e 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -773,50 +773,55 @@ dummy_func(void) { JitOptRef temp = PyJitRef_StripReferenceInfo(retval); DEAD(retval); SAVE_STACK(); - PyCodeObject *co = get_current_code_object(ctx); ctx->frame->stack_pointer = stack_pointer; - if (frame_pop(ctx)) { + PyCodeObject *returning_code = get_code_with_logging(this_instr); + if (returning_code == NULL) { + ctx->done = true; + break; + } + int returning_stacklevel = this_instr->operand1; + if (frame_pop(ctx, returning_code, returning_stacklevel)) { break; } stack_pointer = ctx->frame->stack_pointer; - /* Stack space handling */ - assert(corresponding_check_stack == NULL); - assert(co != NULL); - int framesize = co->co_framesize; - assert(framesize > 0); - assert(framesize <= curr_space); - curr_space -= framesize; - RELOAD_STACK(); res = temp; } op(_RETURN_GENERATOR, ( -- res)) { SYNC_SP(); - PyCodeObject *co = get_current_code_object(ctx); ctx->frame->stack_pointer = stack_pointer; - frame_pop(ctx); + PyCodeObject *returning_code = get_code_with_logging(this_instr); + if (returning_code == NULL) { + ctx->done = true; + break; + } + int returning_stacklevel = this_instr->operand1; + if (frame_pop(ctx, returning_code, returning_stacklevel)) { + break; + } stack_pointer = ctx->frame->stack_pointer; res = sym_new_unknown(ctx); - /* Stack space handling */ - assert(corresponding_check_stack == NULL); - assert(co != NULL); - int framesize = co->co_framesize; - assert(framesize > 0); - assert(framesize <= curr_space); - curr_space -= framesize; - } - - op(_YIELD_VALUE, (unused -- value)) { - // TODO (gh-139109): handle this properly in a future optimization. - // A possibility to handle underflows is to just restore the current frame information - // from whatever is stored in the trace we record at that point of time. - // E.g. we record at this YIELD_VALUE, func_obj=x , stack_level=4 - // We can restore it to there. - value = sym_new_unknown(ctx); - ctx->done = true; - ctx->out_of_space = true; + } + + op(_YIELD_VALUE, (retval -- value)) { + // Mimics PyStackRef_MakeHeapSafe in the interpreter. + JitOptRef temp = PyJitRef_StripReferenceInfo(retval); + DEAD(retval); + SAVE_STACK(); + PyCodeObject *returning_code = get_code_with_logging(this_instr); + if (returning_code == NULL) { + ctx->done = true; + break; + } + int returning_stacklevel = this_instr->operand1; + if (frame_pop(ctx, returning_code, returning_stacklevel)) { + break; + } + stack_pointer = ctx->frame->stack_pointer; + RELOAD_STACK(); + value = temp; } op(_GET_ITER, (iterable -- iter, index_or_null)) { @@ -843,8 +848,6 @@ dummy_func(void) { } op(_CHECK_STACK_SPACE, (unused, unused, unused[oparg] -- unused, unused, unused[oparg])) { - assert(corresponding_check_stack == NULL); - corresponding_check_stack = this_instr; } op (_CHECK_STACK_SPACE_OPERAND, (framesize/2 -- )) { @@ -870,24 +873,6 @@ dummy_func(void) { PyCodeObject *co = (PyCodeObject *)func->func_code; assert(PyFunction_Check(func)); ctx->frame->func = func; - /* Stack space handling */ - int framesize = co->co_framesize; - assert(framesize > 0); - curr_space += framesize; - if (curr_space < 0 || curr_space > INT32_MAX) { - // won't fit in signed 32-bit int - ctx->done = true; - break; - } - max_space = curr_space > max_space ? curr_space : max_space; - if (first_valid_check_stack == NULL) { - first_valid_check_stack = corresponding_check_stack; - } - else if (corresponding_check_stack) { - // delete all but the first valid _CHECK_STACK_SPACE - corresponding_check_stack->opcode = _NOP; - } - corresponding_check_stack = NULL; } op(_UNPACK_SEQUENCE, (seq -- values[oparg], top[0])) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 002da75ea384..5192280e5ca2 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1116,18 +1116,17 @@ JitOptRef temp = PyJitRef_StripReferenceInfo(retval); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - PyCodeObject *co = get_current_code_object(ctx); ctx->frame->stack_pointer = stack_pointer; - if (frame_pop(ctx)) { + PyCodeObject *returning_code = get_code_with_logging(this_instr); + if (returning_code == NULL) { + ctx->done = true; + break; + } + int returning_stacklevel = this_instr->operand1; + if (frame_pop(ctx, returning_code, returning_stacklevel)) { break; } stack_pointer = ctx->frame->stack_pointer; - assert(corresponding_check_stack == NULL); - assert(co != NULL); - int framesize = co->co_framesize; - assert(framesize > 0); - assert(framesize <= curr_space); - curr_space -= framesize; res = temp; stack_pointer[0] = res; stack_pointer += 1; @@ -1169,11 +1168,26 @@ } case _YIELD_VALUE: { + JitOptRef retval; JitOptRef value; - value = sym_new_unknown(ctx); - ctx->done = true; - ctx->out_of_space = true; - stack_pointer[-1] = value; + retval = stack_pointer[-1]; + JitOptRef temp = PyJitRef_StripReferenceInfo(retval); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + PyCodeObject *returning_code = get_code_with_logging(this_instr); + if (returning_code == NULL) { + ctx->done = true; + break; + } + int returning_stacklevel = this_instr->operand1; + if (frame_pop(ctx, returning_code, returning_stacklevel)) { + break; + } + stack_pointer = ctx->frame->stack_pointer; + value = temp; + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); break; } @@ -2563,8 +2577,6 @@ } case _CHECK_STACK_SPACE: { - assert(corresponding_check_stack == NULL); - corresponding_check_stack = this_instr; break; } @@ -2620,21 +2632,6 @@ PyCodeObject *co = (PyCodeObject *)func->func_code; assert(PyFunction_Check(func)); ctx->frame->func = func; - int framesize = co->co_framesize; - assert(framesize > 0); - curr_space += framesize; - if (curr_space < 0 || curr_space > INT32_MAX) { - ctx->done = true; - break; - } - max_space = curr_space > max_space ? curr_space : max_space; - if (first_valid_check_stack == NULL) { - first_valid_check_stack = corresponding_check_stack; - } - else if (corresponding_check_stack) { - corresponding_check_stack->opcode = _NOP; - } - corresponding_check_stack = NULL; break; } @@ -3011,17 +3008,18 @@ case _RETURN_GENERATOR: { JitOptRef res; - PyCodeObject *co = get_current_code_object(ctx); ctx->frame->stack_pointer = stack_pointer; - frame_pop(ctx); + PyCodeObject *returning_code = get_code_with_logging(this_instr); + if (returning_code == NULL) { + ctx->done = true; + break; + } + int returning_stacklevel = this_instr->operand1; + if (frame_pop(ctx, returning_code, returning_stacklevel)) { + break; + } stack_pointer = ctx->frame->stack_pointer; res = sym_new_unknown(ctx); - assert(corresponding_check_stack == NULL); - assert(co != NULL); - int framesize = co->co_framesize; - assert(framesize > 0); - assert(framesize <= curr_space); - curr_space -= framesize; stack_pointer[0] = res; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index b0997f56b98b..b9747b7fd841 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -824,6 +824,7 @@ _Py_uop_frame_new( } _Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth]; + frame->code = co; frame->stack_len = co->co_stacksize; frame->locals_len = co->co_nlocalsplus; @@ -905,18 +906,41 @@ _Py_uop_abstractcontext_init(JitOptContext *ctx) } int -_Py_uop_frame_pop(JitOptContext *ctx) +_Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co, int curr_stackentries) { _Py_UOpsAbstractFrame *frame = ctx->frame; ctx->n_consumed = frame->locals; + ctx->curr_frame_depth--; - // TODO gh-139109: Handle trace recording underflow - if (ctx->curr_frame_depth == 0) { - ctx->done = true; - ctx->out_of_space = true; + + if (ctx->curr_frame_depth >= 1) { + ctx->frame = &ctx->frames[ctx->curr_frame_depth - 1]; + + // We returned to the correct code. Nothing to do here. + if (co == ctx->frame->code) { + return 0; + } + // Else: the code we recorded doesn't match the code we *think* we're + // returning to. We could trace anything, we can't just return to the + // old frame. We have to restore what the tracer recorded + // as the traced next frame. + // Remove the current frame, and later swap it out with the right one. + else { + ctx->curr_frame_depth--; + } + } + // Else: trace stack underflow. + + // This handles swapping out frames. + assert(curr_stackentries >= 1); + // -1 to stackentries as we push to the stack our return value after this. + _Py_UOpsAbstractFrame *new_frame = _Py_uop_frame_new(ctx, co, curr_stackentries - 1, NULL, 0); + if (new_frame == NULL) { return 1; } - ctx->frame = &ctx->frames[ctx->curr_frame_depth - 1]; + + ctx->curr_frame_depth++; + ctx->frame = new_frame; return 0; } -- 2.47.3