From c75d91c5942a0fc2c3cd84d7801091b59516f5e2 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Wed, 5 Nov 2025 22:34:24 +0000 Subject: [PATCH] Don't limit control-flow exits --- Include/internal/pycore_optimizer.h | 3 ++- Python/bytecodes.c | 8 +++++--- Python/executor_cases.c.h | 6 +++--- Python/optimizer.c | 12 +++++++++--- 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index d50e7ccfdd42..a1d95c7c2f4a 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -37,7 +37,8 @@ typedef struct { typedef struct _PyExitData { uint32_t target; uint16_t index; - char is_dynamic; + char is_dynamic:4; + char is_control_flow:4; _Py_BackoffCounter temperature; struct _PyExecutorObject *executor; } _PyExitData; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 3fcfab5ef159..47b73e30971a 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -5276,10 +5276,10 @@ dummy_func( if (frame->lltrace >= 2) { printf("SIDE EXIT: [UOp "); _PyUOpPrint(&next_uop[-1]); - printf(", exit %tu, temp %d, target %d -> %s]\n", + printf(", exit %tu, temp %d, target %d -> %s, is_control_flow %d]\n", exit - current_executor->exits, exit->temperature.value_and_backoff, (int)(target - _PyFrame_GetBytecode(frame)), - _PyOpcode_OpName[target->op.code]); + _PyOpcode_OpName[target->op.code], exit->is_control_flow); } #endif tstate->jit_exit = exit; @@ -5486,7 +5486,9 @@ dummy_func( } _PyExecutorObject *previous_executor = _PyExecutor_FromExit(exit); assert(tstate->current_executor == (PyObject *)previous_executor); - int chain_depth = previous_executor->vm_data.chain_depth + 1; + // For control-flow guards, we don't want to increase the chain depth, as those don't actually + // represent deopts but rather just normal programs! + int chain_depth = previous_executor->vm_data.chain_depth + !exit->is_control_flow; // Note: it's safe to use target->op.arg here instead of the oparg given by EXTENDED_ARG. // The invariant in the optimizer is the deopt target always points back to the first EXTENDED_ARG. // So setting it to anything else is wrong. diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 826c8fc9af69..57dc7c6902c3 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -7118,10 +7118,10 @@ _PyFrame_SetStackPointer(frame, stack_pointer); printf("SIDE EXIT: [UOp "); _PyUOpPrint(&next_uop[-1]); - printf(", exit %tu, temp %d, target %d -> %s]\n", + printf(", exit %tu, temp %d, target %d -> %s, is_control_flow %d]\n", exit - current_executor->exits, exit->temperature.value_and_backoff, (int)(target - _PyFrame_GetBytecode(frame)), - _PyOpcode_OpName[target->op.code]); + _PyOpcode_OpName[target->op.code], exit->is_control_flow); stack_pointer = _PyFrame_GetStackPointer(frame); } #endif @@ -7534,7 +7534,7 @@ } _PyExecutorObject *previous_executor = _PyExecutor_FromExit(exit); assert(tstate->current_executor == (PyObject *)previous_executor); - int chain_depth = previous_executor->vm_data.chain_depth + 1; + int chain_depth = previous_executor->vm_data.chain_depth + !exit->is_control_flow; int succ = _PyJit_TryInitializeTracing(tstate, frame, target, target, target, STACK_LEVEL(), chain_depth, exit, previous_executor, target->op.arg); exit->temperature = restart_backoff_counter(exit->temperature); if (succ) { diff --git a/Python/optimizer.c b/Python/optimizer.c index 268a3f3c3b64..6531cd64bbd6 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -470,6 +470,7 @@ is_for_iter_test[MAX_UOP_ID + 1] = { [_GUARD_NOT_EXHAUSTED_RANGE] = 1, [_GUARD_NOT_EXHAUSTED_LIST] = 1, [_GUARD_NOT_EXHAUSTED_TUPLE] = 1, + [_FOR_ITER_TIER_TWO] = 1, }; static const uint16_t @@ -757,6 +758,7 @@ _PyJit_translate_single_bytecode_to_trace( // inner loop might start and let the traces rejoin. OPT_STAT_INC(inner_loop); ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target); + trace[trace_length-1].operand1 = true; // is_control_flow DPRINTF(2, "JUMP_BACKWARD not to top ends trace %p %p %p\n", next_instr, tstate->interp->jit_state.close_loop_instr, tstate->interp->jit_state.insert_exec_instr); goto done; } @@ -933,6 +935,7 @@ full: // We previously reversed one. max_length += 1; ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target); + trace[trace_length-1].operand1 = true; // is_control_flow } tstate->interp->jit_state.code_curr_size = trace_length; tstate->interp->jit_state.code_max_size = max_length; @@ -1033,13 +1036,14 @@ count_exits(_PyUOpInstruction *buffer, int length) return exit_count; } -static void make_exit(_PyUOpInstruction *inst, int opcode, int target) +static void make_exit(_PyUOpInstruction *inst, int opcode, int target, bool is_control_flow) { inst->opcode = opcode; inst->oparg = 0; inst->operand0 = 0; inst->format = UOP_FORMAT_TARGET; inst->target = target; + inst->operand1 = is_control_flow; #ifdef Py_STATS inst->execution_count = 0; #endif @@ -1096,8 +1100,9 @@ prepare_for_execution(_PyUOpInstruction *buffer, int length) exit_op = _DYNAMIC_EXIT; unique_target = true; } + bool is_control_flow = (opcode == _GUARD_IS_FALSE_POP || opcode == _GUARD_IS_TRUE_POP || is_for_iter_test[opcode]); if (unique_target || jump_target != current_jump_target || current_exit_op != exit_op) { - make_exit(&buffer[next_spare], exit_op, jump_target); + make_exit(&buffer[next_spare], exit_op, jump_target, is_control_flow); current_exit_op = exit_op; current_jump_target = jump_target; current_jump = next_spare; @@ -1113,7 +1118,7 @@ prepare_for_execution(_PyUOpInstruction *buffer, int length) current_popped = popped; current_error = next_spare; current_error_target = target; - make_exit(&buffer[next_spare], _ERROR_POP_N, 0); + make_exit(&buffer[next_spare], _ERROR_POP_N, 0, false); buffer[next_spare].operand0 = target; next_spare++; } @@ -1250,6 +1255,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil dest->operand0 = (uint64_t)exit; exit->executor = opcode == _EXIT_TRACE ? cold : cold_dynamic; exit->is_dynamic = (char)(opcode == _DYNAMIC_EXIT); + exit->is_control_flow = (char)buffer[i].operand1; next_exit--; } } -- 2.47.3