From 36554a514c9be979479b040e9c6270781b69ce59 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 21 Sep 2025 12:15:07 +0100 Subject: [PATCH] Fix over-tracing bug --- Include/internal/pycore_uop_metadata.h | 2 +- Python/bytecodes.c | 47 ++++++++++++++++---------- Python/ceval_macros.h | 5 +-- Python/executor_cases.c.h | 46 ++++++++++++++++--------- Python/generated_cases.c.h | 12 ++++--- Python/optimizer.c | 6 ++-- 6 files changed, 74 insertions(+), 44 deletions(-) diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index f5d7e1fafe80..a7eebe9ad0d5 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -339,7 +339,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_ERROR_POP_N] = HAS_ARG_FLAG, [_TIER2_RESUME_CHECK] = HAS_PERIODIC_FLAG, [_COLD_EXIT] = HAS_ESCAPES_FLAG, - [_GUARD_IP] = HAS_ESCAPES_FLAG, + [_GUARD_IP] = HAS_EXIT_FLAG, [_DYNAMIC_EXIT] = HAS_ESCAPES_FLAG, }; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 98ff74b0d6af..61e97cb99230 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2961,7 +2961,9 @@ dummy_func( tier1 op(_JIT, (--)) { #ifdef _Py_TIER2 _Py_BackoffCounter counter = this_instr[1].counter; - if (!IS_JIT_TRACING() && backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD_JIT) { + if (!IS_JIT_TRACING() && backoff_counter_triggers(counter) && + this_instr->op.code == JUMP_BACKWARD_JIT && + next_instr->op.code != ENTER_EXECUTOR) { _Py_CODEUNIT *start = this_instr; /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ int curr_oparg = oparg; @@ -3053,7 +3055,7 @@ dummy_func( next_instr = this_instr; DISPATCH_GOTO(); } - TIER1_TO_TIER2(executor, 1); + TIER1_TO_TIER2(executor); #else Py_FatalError("ENTER_EXECUTOR is not supported in this build"); #endif /* _Py_TIER2 */ @@ -5458,24 +5460,12 @@ dummy_func( } tier2 op(_GUARD_IP, (ip/4 --)) { - if (frame->instr_ptr != (_Py_CODEUNIT *)ip) { -#ifdef Py_DEBUG - _Py_CODEUNIT *target = frame->instr_ptr; - if (frame->lltrace >= 2) { - printf("GUARD IP EXIT: [UOp "); - _PyUOpPrint(&next_uop[-1]); - printf(", target %d -> %s]\n", - (int)(target - _PyFrame_GetBytecode(frame)), - _PyOpcode_OpName[target->op.code]); - } -#endif - GOTO_TIER_ONE(frame->instr_ptr, 1); - } + EXIT_IF(frame->instr_ptr != (_Py_CODEUNIT *)ip); } - tier2 op(_DYNAMIC_EXIT, (ip/4 --)) { -#ifdef Py_DEBUG + tier2 op(_DYNAMIC_EXIT, (exit_p/4 --)) { _Py_CODEUNIT *target = frame->instr_ptr; +#ifdef Py_DEBUG if (frame->lltrace >= 2) { printf("GUARD IP EXIT: [UOp "); _PyUOpPrint(&next_uop[-1]); @@ -5484,7 +5474,28 @@ dummy_func( _PyOpcode_OpName[target->op.code]); } #endif - GOTO_TIER_ONE(frame->instr_ptr, 1); + _PyExitData *exit = (_PyExitData *)exit_p; + tstate->jit_exit = exit_p; + _Py_BackoffCounter temperature = exit->temperature; + _PyExecutorObject *executor; + if (target->op.code == ENTER_EXECUTOR) { + PyCodeObject *code = _PyFrame_GetCode(frame); + executor = code->co_executors->executors[target->op.arg]; + Py_INCREF(executor); + } + else { + if (!backoff_counter_triggers(temperature)) { + exit->temperature = advance_backoff_counter(temperature); + GOTO_TIER_ONE(frame->instr_ptr, 0); + } + _PyExecutorObject *previous_executor = _PyExecutor_FromExit(exit); + assert(tstate->current_executor == (PyObject *)previous_executor); + int chain_depth = previous_executor->vm_data.chain_depth + 1; + _PyJIT_InitializeTracing(tstate, frame, target, STACK_LEVEL(), chain_depth); + GOTO_TIER_ONE(target, 1); + } + exit->executor = executor; + TIER2_TO_TIER2(exit->executor); } label(pop_2_error) { diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index a13c94278044..0e7e5e81a7f9 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -402,7 +402,7 @@ _PyFrame_SetStackPointer(frame, stack_pointer) /* Tier-switching macros. */ -#define TIER1_TO_TIER2(EXECUTOR, IN_ENTER_EXECUTOR) \ +#define TIER1_TO_TIER2(EXECUTOR) \ do { \ OPT_STAT_INC(traces_executed); \ next_instr = _Py_jit_entry((EXECUTOR), frame, stack_pointer, tstate); \ @@ -414,7 +414,8 @@ do { \ next_instr = frame->instr_ptr; \ JUMP_TO_LABEL(error); \ } \ - if (!IN_ENTER_EXECUTOR && keep_tracing_bit) { \ + if (keep_tracing_bit) { \ + assert(next_instr->op.code != ENTER_EXECUTOR); \ ENTER_TRACING(); \ _PyJIT_InitializeTracing(tstate, frame, next_instr, STACK_LEVEL(), 0); \ } \ diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 013d7e783616..62cda4cf98fb 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -7551,27 +7551,16 @@ case _GUARD_IP: { PyObject *ip = (PyObject *)CURRENT_OPERAND0(); if (frame->instr_ptr != (_Py_CODEUNIT *)ip) { - #ifdef Py_DEBUG - _Py_CODEUNIT *target = frame->instr_ptr; - if (frame->lltrace >= 2) { - _PyFrame_SetStackPointer(frame, stack_pointer); - printf("GUARD IP EXIT: [UOp "); - _PyUOpPrint(&next_uop[-1]); - printf(", target %d -> %s]\n", - (int)(target - _PyFrame_GetBytecode(frame)), - _PyOpcode_OpName[target->op.code]); - stack_pointer = _PyFrame_GetStackPointer(frame); - } - #endif - GOTO_TIER_ONE(frame->instr_ptr, 1); + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); } break; } case _DYNAMIC_EXIT: { - PyObject *ip = (PyObject *)CURRENT_OPERAND0(); - #ifdef Py_DEBUG + PyObject *exit_p = (PyObject *)CURRENT_OPERAND0(); _Py_CODEUNIT *target = frame->instr_ptr; + #ifdef Py_DEBUG if (frame->lltrace >= 2) { _PyFrame_SetStackPointer(frame, stack_pointer); printf("GUARD IP EXIT: [UOp "); @@ -7582,7 +7571,32 @@ stack_pointer = _PyFrame_GetStackPointer(frame); } #endif - GOTO_TIER_ONE(frame->instr_ptr, 1); + _PyExitData *exit = (_PyExitData *)exit_p; + tstate->jit_exit = exit_p; + _Py_BackoffCounter temperature = exit->temperature; + _PyExecutorObject *executor; + if (target->op.code == ENTER_EXECUTOR) { + PyCodeObject *code = _PyFrame_GetCode(frame); + executor = code->co_executors->executors[target->op.arg]; + Py_INCREF(executor); + } + else { + if (!backoff_counter_triggers(temperature)) { + exit->temperature = advance_backoff_counter(temperature); + GOTO_TIER_ONE(frame->instr_ptr, 0); + } + _PyFrame_SetStackPointer(frame, stack_pointer); + _PyExecutorObject *previous_executor = _PyExecutor_FromExit(exit); + stack_pointer = _PyFrame_GetStackPointer(frame); + assert(tstate->current_executor == (PyObject *)previous_executor); + int chain_depth = previous_executor->vm_data.chain_depth + 1; + _PyFrame_SetStackPointer(frame, stack_pointer); + _PyJIT_InitializeTracing(tstate, frame, target, STACK_LEVEL(), chain_depth); + stack_pointer = _PyFrame_GetStackPointer(frame); + GOTO_TIER_ONE(target, 1); + } + exit->executor = executor; + TIER2_TO_TIER2(exit->executor); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 9551acb4d64b..8201d2c236c1 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5818,7 +5818,7 @@ next_instr = this_instr; DISPATCH_GOTO(); } - TIER1_TO_TIER2(executor, 1); + TIER1_TO_TIER2(executor); #else Py_FatalError("ENTER_EXECUTOR is not supported in this build"); #endif /* _Py_TIER2 */ @@ -8147,7 +8147,9 @@ { #ifdef _Py_TIER2 _Py_BackoffCounter counter = this_instr[1].counter; - if (!IS_JIT_TRACING() && backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD_JIT) { + if (!IS_JIT_TRACING() && backoff_counter_triggers(counter) && + this_instr->op.code == JUMP_BACKWARD_JIT && + next_instr->op.code != ENTER_EXECUTOR) { _Py_CODEUNIT *start = this_instr; int curr_oparg = oparg; while (curr_oparg > 255) { @@ -18960,7 +18962,7 @@ next_instr = this_instr; DISPATCH_GOTO(); } - TIER1_TO_TIER2(executor, 1); + TIER1_TO_TIER2(executor); #else Py_FatalError("ENTER_EXECUTOR is not supported in this build"); #endif /* _Py_TIER2 */ @@ -21363,7 +21365,9 @@ { #ifdef _Py_TIER2 _Py_BackoffCounter counter = this_instr[1].counter; - if (!IS_JIT_TRACING() && backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD_JIT) { + if (!IS_JIT_TRACING() && backoff_counter_triggers(counter) && + this_instr->op.code == JUMP_BACKWARD_JIT && + next_instr->op.code != ENTER_EXECUTOR) { _Py_CODEUNIT *start = this_instr; int curr_oparg = oparg; while (curr_oparg > 255) { diff --git a/Python/optimizer.c b/Python/optimizer.c index b973fdd7ac41..f59beda5f8a4 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -842,7 +842,7 @@ count_exits(_PyUOpInstruction *buffer, int length) int exit_count = 0; for (int i = 0; i < length; i++) { int opcode = buffer[i].opcode; - if (opcode == _EXIT_TRACE) { + if (opcode == _EXIT_TRACE || opcode == _DYNAMIC_EXIT) { exit_count++; } } @@ -898,7 +898,7 @@ prepare_for_execution(_PyUOpInstruction *buffer, int length) else if (exit_flags & HAS_PERIODIC_FLAG) { exit_op = _HANDLE_PENDING_AND_DEOPT; } - if (opcode == _FOR_ITER_TIER_TWO) { + if (opcode == _FOR_ITER_TIER_TWO || opcode == _GUARD_IP) { exit_op = _DYNAMIC_EXIT; } int32_t jump_target = target; @@ -1053,7 +1053,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil int opcode = buffer[i].opcode; dest--; *dest = buffer[i]; - if (opcode == _EXIT_TRACE) { + if (opcode == _EXIT_TRACE || opcode == _DYNAMIC_EXIT) { _PyExitData *exit = &executor->exits[next_exit]; exit->target = buffer[i].target; dest->operand0 = (uint64_t)exit; -- 2.47.3