From f55129ee4281ac93296a0b246bc48f0f6d9d2579 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 17 Oct 2025 00:24:54 +0100 Subject: [PATCH] fix recursive tracing --- Include/internal/pycore_interp_structs.h | 4 +- Python/ceval.c | 35 ++++++----- Python/ceval_macros.h | 19 +----- Python/optimizer.c | 74 +++++++++++++----------- 4 files changed, 65 insertions(+), 67 deletions(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index af715518c077..e3264035ed9b 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -944,7 +944,6 @@ struct _is { struct callable_cache callable_cache; PyObject *common_consts[NUM_COMMON_CONSTANTS]; // JIT tracing state - bool jit_is_tracing; int jit_tracer_code_max_size; int jit_tracer_code_curr_size; _PyBloomFilter jit_tracer_dependencies; @@ -955,7 +954,8 @@ struct _is { PyCodeObject *jit_tracer_initial_code; // Strong PyFunctionObject *jit_tracer_initial_func; // Strong struct _PyExitData *jit_tracer_previous_exit; - bool jit_completed_loop; + _PyInterpreterFrame *jit_tracer_current_frame; + // End Jit tracing state bool jit; bool compiling; struct _PyUOpInstruction *jit_uop_buffer; diff --git a/Python/ceval.c b/Python/ceval.c index bf1616b04bf5..c08e5830744e 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1049,12 +1049,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int return NULL; } - // We came in already tracing, this means a recursive trace in the form of - // Python -> C -> Python happened. We want to abandon the outer trace then. - if (IS_JIT_TRACING()) { - _PyJIT_FinalizeTracing(tstate); - tstate->interp->jit_is_tracing = false; - } /* Local "register" variables. * These are cached values from the frame and code object. */ _Py_CODEUNIT *next_instr; @@ -1201,14 +1195,27 @@ tier2_dispatch: uopcode = next_uop->opcode; #ifdef Py_DEBUG if (frame->lltrace >= 2) { - // if (next_uop->opcode != _YIELD_VALUE && - // next_uop->opcode != _FOR_ITER_GEN_FRAME && - // next_uop->opcode != _PUSH_FRAME && - // next_uop->opcode != _PY_FRAME_KW && - // next_uop->opcode != _SAVE_RETURN_OFFSET && - // next_uop->opcode != _SAVE_RETURN_OFFSET) { - // dump_stack(frame, stack_pointer); - // } + if (next_uop->opcode != _YIELD_VALUE && + next_uop->opcode != _FOR_ITER_GEN_FRAME && + next_uop->opcode != _PUSH_FRAME && + next_uop->opcode != _PY_FRAME_KW && + next_uop->opcode != _SAVE_RETURN_OFFSET && + next_uop->opcode != _SAVE_RETURN_OFFSET) { + if (next_uop->opcode != _START_EXECUTOR) { + if (next_uop->format == UOP_FORMAT_TARGET) { + _Py_CODEUNIT *aim = _PyFrame_GetBytecode(frame) + next_uop->target; + printf(" aim=[%s]\n", _PyOpcode_OpName[aim->op.code]); + } + else if (next_uop->format == UOP_FORMAT_JUMP) { + _PyUOpInstruction *aim_uop = current_executor->trace + next_uop->jump_target; + if (aim_uop->format == UOP_FORMAT_TARGET) { + _Py_CODEUNIT *aim = _PyFrame_GetBytecode(frame) + aim_uop->target; + printf(" aim=[%s]\n", _PyOpcode_OpName[aim->op.code]); + } + } + } + dump_stack(frame, stack_pointer); + } if (next_uop->opcode == _START_EXECUTOR) { printf("%4d uop: ", 0); } diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 8085fb3f1928..ff038026f977 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -134,25 +134,14 @@ RECORD_JUMP_TAKEN() \ RECORD_TRACE_NO_DISPATCH() \ assert(!IS_JIT_TRACING()); \ - RELOAD_TRACING(); \ JUMP_TO_LABEL(label); #if _Py_TAIL_CALL_INTERP || USE_COMPUTED_GOTOS -# define IS_JIT_TRACING() (tstate->interp->jit_is_tracing) -# define SET_TRACING() \ - DISPATCH_TABLE_VAR = TRACING_DISPATCH_TABLE; +# define IS_JIT_TRACING() (DISPATCH_TABLE_VAR == TRACING_DISPATCH_TABLE) # define ENTER_TRACING() \ - SET_TRACING(); \ - tstate->interp->jit_is_tracing = true; -# define UNSET_TRACING() \ - DISPATCH_TABLE_VAR = DISPATCH_TABLE; + DISPATCH_TABLE_VAR = TRACING_DISPATCH_TABLE; # define LEAVE_TRACING() \ - assert(IS_JIT_TRACING()); \ - UNSET_TRACING(); \ - tstate->interp->jit_is_tracing = false; -// This handles recursive tracing over C calls. -# define RELOAD_TRACING() \ - DISPATCH_TABLE_VAR = tstate->interp->jit_is_tracing ? TRACING_DISPATCH_TABLE : DISPATCH_TABLE; + DISPATCH_TABLE_VAR = DISPATCH_TABLE; # define BAIL_TRACING_NO_DISPATCH() \ LEAVE_TRACING(); \ _PyFrame_SetStackPointer(frame, stack_pointer); \ @@ -224,14 +213,12 @@ do { \ } while (0) #define TRACING_DISPATCH_INLINED(NEW_FRAME) \ - RELOAD_TRACING(); \ RECORD_TRACE_NO_DISPATCH(); \ DISPATCH_INLINED(NEW_FRAME); #define TRACING_DISPATCH() \ { \ assert(frame->stackpointer == NULL); \ - RELOAD_TRACING(); \ RECORD_TRACE_NO_DISPATCH(); \ NEXTOPARG(); \ PRE_DISPATCH_GOTO(); \ diff --git a/Python/optimizer.c b/Python/optimizer.c index 532881aef92f..ed0e92bceb9c 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -562,7 +562,8 @@ _PyJIT_translate_single_bytecode_to_trace( if (Py_IsNone((PyObject *)func)) { func = NULL; } - int is_first_instr = tstate->interp->jit_tracer_initial_instr == this_instr ; + + int is_first_instr = tstate->interp->jit_tracer_initial_instr == this_instr; bool progress_needed = (tstate->interp->jit_tracer_initial_chain_depth % MAX_CHAIN_DEPTH) == 0 && is_first_instr;; _PyBloomFilter *dependencies = &tstate->interp->jit_tracer_dependencies; _Py_BloomFilter_Add(dependencies, old_code); @@ -583,7 +584,39 @@ _PyJIT_translate_single_bytecode_to_trace( target = INSTR_IP(target_instr, old_code); - DPRINTF(2, "%d: %s(%d)\n", target, _PyOpcode_OpName[opcode], oparg); + bool needs_guard_ip = _PyOpcode_NeedsGuardIp[opcode] && + !(opcode == FOR_ITER_RANGE || opcode == FOR_ITER_LIST || opcode == FOR_ITER_TUPLE) && + !(opcode == JUMP_BACKWARD_NO_INTERRUPT || opcode == JUMP_BACKWARD || opcode == JUMP_BACKWARD_JIT) && + !(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_NONE || opcode == POP_JUMP_IF_NOT_NONE); + + // Strange control-flow, unsupported opcode, etc. + if (jump_taken || + // This happens when a recursive call happens that we can't trace. Such as Python -> C -> Python calls + // If we haven't guarded the IP, then it's untraceable. + (frame != tstate->interp->jit_tracer_current_frame && !needs_guard_ip) || + // TODO handle extended args. + oparg > 255 || opcode == EXTENDED_ARG || + opcode == WITH_EXCEPT_START || opcode == RERAISE || opcode == CLEANUP_THROW || opcode == PUSH_EXC_INFO || + frame->owner >= FRAME_OWNED_BY_INTERPRETER || + // This can be supported, but requires a tracing shim frame. + opcode == CALL_ALLOC_AND_ENTER_INIT) { + unsupported: + { + // Rewind to previous instruction and replace with _EXIT_TRACE. + _PyUOpInstruction *curr = &trace[trace_length-1]; + while (curr->opcode != _SET_IP && trace_length > 1) { + trace_length--; + curr = &trace[trace_length-1]; + } + assert(curr->opcode == _SET_IP || trace_length == 1); + curr->opcode = _EXIT_TRACE; + goto done; + } + } + + tstate->interp->jit_tracer_current_frame = frame; + + DPRINTF(2, "%p %d: %s(%d)\n", old_code, target, _PyOpcode_OpName[opcode], oparg); if (opcode == NOP) { return 1; @@ -601,47 +634,18 @@ _PyJIT_translate_single_bytecode_to_trace( max_length -= 2; if (opcode == ENTER_EXECUTOR) { - ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, target); - ADD_TO_TRACE(_SET_IP, 0, (uintptr_t)target_instr, target); - ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target); goto full; } - bool needs_guard_ip = _PyOpcode_NeedsGuardIp[opcode] && - !(opcode == FOR_ITER_RANGE || opcode == FOR_ITER_LIST || opcode == FOR_ITER_TUPLE) && - !(opcode == JUMP_BACKWARD_NO_INTERRUPT || opcode == JUMP_BACKWARD || opcode == JUMP_BACKWARD_JIT) && - !(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_NONE || opcode == POP_JUMP_IF_NOT_NONE); - - assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG); - const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode]; - // Strange control-flow, unsupported opcode, etc. - if (jump_taken || - // TODO handle extended args. - oparg > 255 || - opcode == EXTENDED_ARG || - opcode == WITH_EXCEPT_START || opcode == RERAISE || opcode == CLEANUP_THROW || opcode == PUSH_EXC_INFO || - frame->owner >= FRAME_OWNED_BY_INTERPRETER || - // This can be supported, but requires a tracing shim frame. - opcode == CALL_ALLOC_AND_ENTER_INIT) { - unsupported: - { - // Rewind to previous instruction and replace with _EXIT_TRACE. - _PyUOpInstruction *curr = &trace[trace_length-1]; - while (curr->opcode != _SET_IP && trace_length > 1) { - trace_length--; - curr = &trace[trace_length-1]; - } - assert(curr->opcode == _SET_IP || trace_length == 1); - curr->opcode = _EXIT_TRACE; - goto done; - } - } RESERVE_RAW(expansion->nuops + needs_guard_ip + 3, "uop and various checks"); ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, target); + assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG); + assert(!_PyErr_Occurred(tstate)); + if (!OPCODE_HAS_NO_SAVE_IP(opcode)) { ADD_TO_TRACE(_SET_IP, 0, (uintptr_t)target_instr, target); } @@ -851,9 +855,9 @@ _PyJIT_InitializeTracing(PyThreadState *tstate, _PyInterpreterFrame *frame, _Py_ tstate->interp->jit_tracer_initial_func = (PyFunctionObject *)Py_NewRef(_PyFrame_GetFunction(frame)); tstate->interp->jit_tracer_previous_exit = exit; memset(&tstate->interp->jit_tracer_dependencies.bits, 0, sizeof(tstate->interp->jit_tracer_dependencies.bits)); - tstate->interp->jit_completed_loop = false; tstate->interp->jit_tracer_initial_stack_depth = curr_stackdepth; tstate->interp->jit_tracer_initial_chain_depth = chain_depth; + tstate->interp->jit_tracer_current_frame = frame; } void -- 2.47.3