From 1ef4a37922bf4ef4b6e5ec2349539a626fdf7f22 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 5 Nov 2025 21:11:53 +0000 Subject: [PATCH] Make sure we don't reenter executors when guard exec ip fails --- Include/internal/pycore_ceval.h | 3 ++- Python/bytecodes.c | 31 +++++++++++++++++++++++++------ Python/ceval_macros.h | 11 +++-------- Python/executor_cases.c.h | 26 +++++++++++++++++++++----- Python/generated_cases.c.h | 6 +++++- Python/optimizer.c | 8 +++++++- Tools/cases_generator/analyzer.py | 2 ++ 7 files changed, 65 insertions(+), 22 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 33b9fd053f70..02da61952256 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -334,9 +334,10 @@ _PyEval_SpecialMethodCanSuggest(PyObject *self, int oparg); #define _PY_EVAL_PLEASE_STOP_BIT (1U << 5) #define _PY_EVAL_EXPLICIT_MERGE_BIT (1U << 6) #define _PY_EVAL_JIT_INVALIDATE_COLD_BIT (1U << 7) +#define _PY_EVAL_JIT_DO_NOT_REENTER (1U << 8) /* Reserve a few bits for future use */ -#define _PY_EVAL_EVENTS_BITS 8 +#define _PY_EVAL_EVENTS_BITS 9 #define _PY_EVAL_EVENTS_MASK ((1 << _PY_EVAL_EVENTS_BITS)-1) static inline void diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 0ecfd0ff993c..ddd55e71fc19 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -44,7 +44,6 @@ #define USE_COMPUTED_GOTOS 0 #include "ceval_macros.h" -#include "../Include/internal/pycore_stackref.h" /* Flow control macros */ @@ -2988,6 +2987,9 @@ dummy_func( if (succ) { ENTER_TRACING(); } + else { + this_instr[1].counter = restart_backoff_counter(counter); + } } else { ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); @@ -3047,7 +3049,8 @@ dummy_func( /* If the eval breaker is set then stay in tier 1. * This avoids any potentially infinite loops * involving _RESUME_CHECK */ - if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) { + if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & (_PY_EVAL_EVENTS_MASK | _PY_EVAL_JIT_DO_NOT_REENTER)) { + _Py_unset_eval_breaker_bit(tstate, _PY_EVAL_JIT_DO_NOT_REENTER); opcode = executor->vm_data.opcode; oparg = (oparg & ~255) | executor->vm_data.oparg; next_instr = this_instr; @@ -5302,6 +5305,7 @@ dummy_func( tstate->jit_exit = exit; assert(exit->is_dynamic); _PyExecutorObject *exec = exit->executor; + assert(exec->trace[0].opcode == _COLD_DYNAMIC_EXIT || exec->trace[0].opcode == _START_DYNAMIC_EXECUTOR); TIER2_TO_TIER2(exec); } @@ -5412,6 +5416,7 @@ dummy_func( #ifndef _Py_JIT assert(current_executor == (_PyExecutorObject*)executor); #endif + assert(tstate->jit_exit == NULL || tstate->jit_exit->executor == current_executor); tstate->current_executor = (PyObject *)executor; if (!current_executor->vm_data.valid) { assert(tstate->jit_exit->executor == current_executor); @@ -5470,6 +5475,11 @@ dummy_func( PyCodeObject *code = _PyFrame_GetCode(frame); executor = code->co_executors->executors[target->op.arg]; Py_INCREF(executor); + #if Py_DEBUG + if (executor->trace[2].opcode == _GUARD_EXECUTOR_IP) { + assert(executor->trace[2].operand0 == (uint64_t)target); + } + #endif } else { if (frame->owner >= FRAME_OWNED_BY_INTERPRETER) { @@ -5486,10 +5496,10 @@ dummy_func( // The invariant in the optimizer is the deopt target always points back to the first EXTENDED_ARG. // So setting it to anything else is wrong. int succ = _PyJit_TryInitializeTracing(tstate, frame, target, target, target, STACK_LEVEL(), chain_depth, exit, previous_executor, target->op.arg, false); + exit->temperature = restart_backoff_counter(exit->temperature); if (succ) { GOTO_TIER_ONE_CONTINUE_TRACING(target); } - exit->temperature = restart_backoff_counter(exit->temperature); GOTO_TIER_ONE(target); } assert(tstate->jit_exit == exit); @@ -5500,18 +5510,24 @@ dummy_func( tier2 op(_COLD_DYNAMIC_EXIT, ( -- )) { _PyExitData *exit = tstate->jit_exit; assert(exit != NULL); + assert(exit->is_dynamic); _Py_CODEUNIT *target = frame->instr_ptr; _Py_BackoffCounter temperature = exit->temperature; _PyExecutorObject *executor; if (target->op.code == ENTER_EXECUTOR) { PyCodeObject *code = _PyFrame_GetCode(frame); executor = code->co_executors->executors[target->op.arg]; - if (executor->trace[0].opcode == _START_DYNAMIC_EXECUTOR && executor->vm_data.valid) { + if (executor->vm_data.valid && + executor->trace[0].opcode == _START_DYNAMIC_EXECUTOR) { + assert(executor->trace[2].operand0 == (uint64_t)target); Py_INCREF(executor); assert(tstate->jit_exit == exit); exit->executor = executor; TIER2_TO_TIER2(executor); } + else { + GOTO_TIER_ONE(target); + } } if (frame->owner >= FRAME_OWNED_BY_INTERPRETER) { GOTO_TIER_ONE(target); @@ -5524,15 +5540,18 @@ dummy_func( assert(tstate->current_executor == (PyObject *)previous_executor); int chain_depth = previous_executor->vm_data.chain_depth + 1; int succ = _PyJit_TryInitializeTracing(tstate, frame, target, target, target, STACK_LEVEL(), chain_depth, exit, previous_executor, target->op.arg, true); + exit->temperature = restart_backoff_counter(exit->temperature); if (succ) { GOTO_TIER_ONE_CONTINUE_TRACING(target); } - exit->temperature = restart_backoff_counter(exit->temperature); GOTO_TIER_ONE(target); } tier2 op(_GUARD_EXECUTOR_IP, (ip/4 --)) { - EXIT_IF(frame->instr_ptr != (_Py_CODEUNIT*)ip); + if (frame->instr_ptr != (_Py_CODEUNIT*)ip) { + _Py_set_eval_breaker_bit(tstate, _PY_EVAL_JIT_DO_NOT_REENTER); + EXIT_IF(true); + } } tier2 op(_GUARD_IP__PUSH_FRAME, (ip/4 --)) { diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index b32f11652bc4..e12040bcc848 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -401,14 +401,9 @@ do { \ next_instr = frame->instr_ptr + 1; \ JUMP_TO_LABEL(error); \ } \ - /* No progress made */ \ - if (next_instr == this_instr) { \ - opcode = executor->vm_data.opcode; \ - oparg = (oparg & ~255) | executor->vm_data.oparg; \ - if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]) { \ - PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter); \ - } \ - DISPATCH_GOTO(); \ + /* Progress made */ \ + if (next_instr != this_instr) { \ + _Py_unset_eval_breaker_bit(tstate, _PY_EVAL_JIT_DO_NOT_REENTER); \ } \ if (keep_tracing_bit) { \ assert(tstate->interp->jit_state.code_curr_size == 2 || tstate->interp->jit_state.code_curr_size == 3); \ diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index d613e4fa67f0..c45bf72033b6 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -7150,6 +7150,7 @@ tstate->jit_exit = exit; assert(exit->is_dynamic); _PyExecutorObject *exec = exit->executor; + assert(exec->trace[0].opcode == _COLD_DYNAMIC_EXIT || exec->trace[0].opcode == _START_DYNAMIC_EXECUTOR); TIER2_TO_TIER2(exec); break; } @@ -7445,6 +7446,7 @@ #ifndef _Py_JIT assert(current_executor == (_PyExecutorObject*)executor); #endif + assert(tstate->jit_exit == NULL || tstate->jit_exit->executor == current_executor); tstate->current_executor = (PyObject *)executor; if (!current_executor->vm_data.valid) { assert(tstate->jit_exit->executor == current_executor); @@ -7521,6 +7523,11 @@ PyCodeObject *code = _PyFrame_GetCode(frame); executor = code->co_executors->executors[target->op.arg]; Py_INCREF(executor); + #if Py_DEBUG + if (executor->trace[2].opcode == _GUARD_EXECUTOR_IP) { + assert(executor->trace[2].operand0 == (uint64_t)target); + } + #endif } else { if (frame->owner >= FRAME_OWNED_BY_INTERPRETER) { @@ -7534,10 +7541,10 @@ assert(tstate->current_executor == (PyObject *)previous_executor); int chain_depth = previous_executor->vm_data.chain_depth + 1; int succ = _PyJit_TryInitializeTracing(tstate, frame, target, target, target, STACK_LEVEL(), chain_depth, exit, previous_executor, target->op.arg, false); + exit->temperature = restart_backoff_counter(exit->temperature); if (succ) { GOTO_TIER_ONE_CONTINUE_TRACING(target); } - exit->temperature = restart_backoff_counter(exit->temperature); GOTO_TIER_ONE(target); } assert(tstate->jit_exit == exit); @@ -7549,18 +7556,24 @@ case _COLD_DYNAMIC_EXIT: { _PyExitData *exit = tstate->jit_exit; assert(exit != NULL); + assert(exit->is_dynamic); _Py_CODEUNIT *target = frame->instr_ptr; _Py_BackoffCounter temperature = exit->temperature; _PyExecutorObject *executor; if (target->op.code == ENTER_EXECUTOR) { PyCodeObject *code = _PyFrame_GetCode(frame); executor = code->co_executors->executors[target->op.arg]; - if (executor->trace[0].opcode == _START_DYNAMIC_EXECUTOR && executor->vm_data.valid) { + if (executor->vm_data.valid && + executor->trace[0].opcode == _START_DYNAMIC_EXECUTOR) { + assert(executor->trace[2].operand0 == (uint64_t)target); Py_INCREF(executor); assert(tstate->jit_exit == exit); exit->executor = executor; TIER2_TO_TIER2(executor); } + else { + GOTO_TIER_ONE(target); + } } if (frame->owner >= FRAME_OWNED_BY_INTERPRETER) { GOTO_TIER_ONE(target); @@ -7573,10 +7586,10 @@ assert(tstate->current_executor == (PyObject *)previous_executor); int chain_depth = previous_executor->vm_data.chain_depth + 1; int succ = _PyJit_TryInitializeTracing(tstate, frame, target, target, target, STACK_LEVEL(), chain_depth, exit, previous_executor, target->op.arg, true); + exit->temperature = restart_backoff_counter(exit->temperature); if (succ) { GOTO_TIER_ONE_CONTINUE_TRACING(target); } - exit->temperature = restart_backoff_counter(exit->temperature); GOTO_TIER_ONE(target); break; } @@ -7584,8 +7597,11 @@ case _GUARD_EXECUTOR_IP: { PyObject *ip = (PyObject *)CURRENT_OPERAND0(); if (frame->instr_ptr != (_Py_CODEUNIT*)ip) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); + _Py_set_eval_breaker_bit(tstate, _PY_EVAL_JIT_DO_NOT_REENTER); + if (true) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } } break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index b5d95fd19ae4..78e8422731ea 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5509,7 +5509,8 @@ assert(executor->vm_data.code == code); assert(executor->vm_data.valid); assert(tstate->current_executor == NULL); - if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) { + if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & (_PY_EVAL_EVENTS_MASK | _PY_EVAL_JIT_DO_NOT_REENTER)) { + _Py_unset_eval_breaker_bit(tstate, _PY_EVAL_JIT_DO_NOT_REENTER); opcode = executor->vm_data.opcode; oparg = (oparg & ~255) | executor->vm_data.oparg; next_instr = this_instr; @@ -7720,6 +7721,9 @@ if (succ) { ENTER_TRACING(); } + else { + this_instr[1].counter = restart_backoff_counter(counter); + } } else { ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); diff --git a/Python/optimizer.c b/Python/optimizer.c index d4578b5e2bde..e507607f6ff3 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -969,7 +969,9 @@ _PyJit_TryInitializeTracing(PyThreadState *tstate, _PyInterpreterFrame *frame, _ #endif if (is_dynamic_target) { - add_to_trace(tstate->interp->jit_state.code_buffer, 0, _START_DYNAMIC_EXECUTOR, 0, (uintptr_t)insert_exec_instr, INSTR_IP(insert_exec_instr, code)); + assert(curr_instr == frame->instr_ptr); + assert(curr_instr == insert_exec_instr); + add_to_trace(tstate->interp->jit_state.code_buffer, 0, _START_DYNAMIC_EXECUTOR, 0, (uintptr_t)insert_exec_instr, 0); add_to_trace(tstate->interp->jit_state.code_buffer, 1, _MAKE_WARM, 0, 0, 0); add_to_trace(tstate->interp->jit_state.code_buffer, 2, _GUARD_EXECUTOR_IP, 0, (uintptr_t)curr_instr, 0); tstate->interp->jit_state.code_curr_size = 3; @@ -1186,6 +1188,9 @@ sanity_check(_PyExecutorObject *executor) executor->trace[0].opcode == _COLD_EXIT || executor->trace[0].opcode == _COLD_DYNAMIC_EXIT || executor->trace[0].opcode == _START_DYNAMIC_EXECUTOR); + if (executor->trace[0].opcode == _START_DYNAMIC_EXECUTOR) { + CHECK(executor->trace[2].opcode == _GUARD_EXECUTOR_IP); + } for (; i < executor->code_size; i++) { const _PyUOpInstruction *inst = &executor->trace[i]; uint16_t opcode = inst->opcode; @@ -1579,6 +1584,7 @@ _PyExecutor_GetColdDynamicExecutor(void) { PyInterpreterState *interp = _PyInterpreterState_GET(); if (interp->cold_dynamic_executor != NULL) { + assert(interp->cold_dynamic_executor->trace[0].opcode == _COLD_DYNAMIC_EXIT); return interp->cold_dynamic_executor; } _PyExecutorObject *cold = allocate_executor(0, 1); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index b96e472d5378..5bba28578bfe 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -700,6 +700,8 @@ NON_ESCAPING_FUNCTIONS = ( "_PyLong_CheckExactAndCompact", "_PyExecutor_FromExit", "_PyJit_TryInitializeTracing", + "_Py_unset_eval_breaker_bit", + "_Py_set_eval_breaker_bit", ) -- 2.47.3