From 9a66605da2b8f3328260213172cd20b79d12ca88 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 23 Sep 2025 00:05:28 +0100 Subject: [PATCH] Fix chain depth bug --- Python/bytecodes.c | 3 ++- Python/executor_cases.c.h | 3 ++- Python/optimizer.c | 47 ++++++++++++++++++++++----------------- 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 43f81bfdf746..e0150aafaf52 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -5289,6 +5289,7 @@ dummy_func( } #endif tstate->jit_exit = exit; + assert(!exit->is_dynamic); TIER2_TO_TIER2(exit->executor); } @@ -5454,7 +5455,7 @@ dummy_func( } _PyExecutorObject *previous_executor = _PyExecutor_FromExit(exit); assert(tstate->current_executor == (PyObject *)previous_executor); - int chain_depth = 0; + int chain_depth = is_dynamic ? 0 : current_executor->vm_data.chain_depth + 1; _PyJIT_InitializeTracing(tstate, frame, target, STACK_LEVEL(), chain_depth); GOTO_TIER_ONE(target, 1); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index f2a100210d4b..56a1c9b7a52e 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -7178,6 +7178,7 @@ } #endif tstate->jit_exit = exit; + assert(!exit->is_dynamic); TIER2_TO_TIER2(exit->executor); break; } @@ -7538,7 +7539,7 @@ } _PyExecutorObject *previous_executor = _PyExecutor_FromExit(exit); assert(tstate->current_executor == (PyObject *)previous_executor); - int chain_depth = 0; + int chain_depth = is_dynamic ? 0 : current_executor->vm_data.chain_depth + 1; _PyJIT_InitializeTracing(tstate, frame, target, STACK_LEVEL(), chain_depth); GOTO_TIER_ONE(target, 1); } diff --git a/Python/optimizer.c b/Python/optimizer.c index c7d8784f2bef..5ab6ef4e91bf 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -102,7 +102,7 @@ insert_executor(PyCodeObject *code, _Py_CODEUNIT *instr, int index, _PyExecutorO } static _PyExecutorObject * -make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFilter *dependencies); +make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFilter *dependencies, int chain_depth); static int uop_optimize(_PyInterpreterFrame *frame, PyThreadState *tstate, @@ -128,8 +128,7 @@ _PyOptimizer_Optimize( // make progress in order to avoid infinite loops or excessively-long // side-exit chains. We can only insert the executor into the bytecode if // this is true, since a deopt won't infinitely re-enter the executor: - chain_depth %= MAX_CHAIN_DEPTH; - bool progress_needed = chain_depth == 0; + bool progress_needed = (chain_depth % MAX_CHAIN_DEPTH) == 0; PyCodeObject *code = (PyCodeObject *)tstate->interp->jit_tracer_initial_func->func_code; assert(PyCode_Check(code)); _Py_CODEUNIT *start = tstate->interp->jit_tracer_initial_instr; @@ -144,19 +143,24 @@ _PyOptimizer_Optimize( return err; } assert(executor != NULL); - int index = get_index_for_executor(code, start); - if (index < 0) { - /* Out of memory. Don't raise and assume that the - * error will show up elsewhere. - * - * If an optimizer has already produced an executor, - * it might get confused by the executor disappearing, - * but there is not much we can do about that here. */ - Py_DECREF(executor); - interp->compiling = false; - return 0; + if (progress_needed) { + int index = get_index_for_executor(code, start); + if (index < 0) { + /* Out of memory. Don't raise and assume that the + * error will show up elsewhere. + * + * If an optimizer has already produced an executor, + * it might get confused by the executor disappearing, + * but there is not much we can do about that here. */ + Py_DECREF(executor); + interp->compiling = false; + return 0; + } + insert_executor(code, start, index, executor); + } + else { + executor->vm_data.code = NULL; } - insert_executor(code, start, index, executor); executor->vm_data.chain_depth = chain_depth; assert(executor->vm_data.valid); interp->compiling = false; @@ -544,7 +548,8 @@ _PyJIT_translate_single_bytecode_to_trace( if (Py_IsNone((PyObject *)func)) { func = NULL; } - bool progress_needed = (tstate->interp->jit_tracer_initial_chain_depth % MAX_CHAIN_DEPTH) == 0;; + int is_first_instr = tstate->interp->jit_tracer_initial_instr == this_instr ; + bool progress_needed = (tstate->interp->jit_tracer_initial_chain_depth % MAX_CHAIN_DEPTH) == 0 && is_first_instr;; _PyBloomFilter *dependencies = &tstate->interp->jit_tracer_dependencies; _Py_BloomFilter_Add(dependencies, old_code); _Py_CODEUNIT *target_instr = this_instr; @@ -629,7 +634,7 @@ _PyJIT_translate_single_bytecode_to_trace( /* Special case the first instruction, * so that we can guarantee forward progress */ - if (progress_needed && tstate->interp->jit_tracer_initial_instr == this_instr && tstate->interp->jit_tracer_code_curr_size == 0) { + if (progress_needed && is_first_instr && tstate->interp->jit_tracer_code_curr_size == 0) { if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) { opcode = _PyOpcode_Deopt[opcode]; } @@ -638,7 +643,7 @@ _PyJIT_translate_single_bytecode_to_trace( } // Loop back to the start - if (tstate->interp->jit_tracer_initial_instr == this_instr && tstate->interp->jit_tracer_code_curr_size > 2) { + if (is_first_instr && tstate->interp->jit_tracer_code_curr_size > 2) { // Undo the last few instructions. trace_length = tstate->interp->jit_tracer_code_curr_size; ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0); @@ -1047,7 +1052,7 @@ sanity_check(_PyExecutorObject *executor) * and not a NOP. */ static _PyExecutorObject * -make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFilter *dependencies) +make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFilter *dependencies, int chain_depth) { int exit_count = count_exits(buffer, length); _PyExecutorObject *executor = allocate_executor(exit_count, length); @@ -1057,6 +1062,8 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil /* Initialize exits */ _PyExecutorObject *cold = _PyExecutor_GetColdExecutor(); + fprintf(stdout, "CHAIN DEPTH %d;\n", chain_depth); + cold->vm_data.chain_depth = chain_depth; for (int i = 0; i < exit_count; i++) { executor->exits[i].index = i; executor->exits[i].temperature = initial_temperature_backoff_counter(); @@ -1191,7 +1198,7 @@ uop_optimize( OPT_HIST(effective_trace_length(buffer, length), optimized_trace_length_hist); length = prepare_for_execution(buffer, length); assert(length <= UOP_MAX_TRACE_LENGTH); - _PyExecutorObject *executor = make_executor_from_uops(buffer, length, &dependencies); + _PyExecutorObject *executor = make_executor_from_uops(buffer, length, &dependencies, tstate->interp->jit_tracer_initial_chain_depth); if (executor == NULL) { return -1; } -- 2.47.3