From 7d17741473d641cdddd3626c032e93479bc98cbd Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 21 Oct 2025 21:21:29 +0100 Subject: [PATCH] First half of reviews --- Include/internal/pycore_interp_structs.h | 6 ++-- Include/internal/pycore_opcode_metadata.h | 23 +++++---------- Include/internal/pycore_optimizer.h | 1 - Python/bytecodes.c | 13 ++++----- Python/ceval_macros.h | 4 +-- Python/executor_cases.c.h | 3 +- Python/generated_cases.c.h | 2 +- Python/generated_tracer_cases.c.h | 12 ++++---- Python/optimizer.c | 33 ++++++++-------------- Python/optimizer_analysis.c | 6 +++- Tools/cases_generator/analyzer.py | 2 +- Tools/cases_generator/generators_common.py | 4 +-- Tools/cases_generator/tracer_generator.py | 2 +- 13 files changed, 46 insertions(+), 65 deletions(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 142bcbece622..5285233b6b13 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -757,7 +757,7 @@ struct _Py_unique_id_pool { typedef _Py_CODEUNIT *(*_PyJitEntryFuncPtr)(struct _PyExecutorObject *exec, _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate); -typedef struct _PyJitState { +typedef struct _PyJitTracerState { int jit_tracer_code_max_size; int jit_tracer_code_curr_size; _PyBloomFilter jit_tracer_dependencies; @@ -772,7 +772,7 @@ typedef struct _PyJitState { PyFunctionObject *jit_tracer_initial_func; // Strong struct _PyExitData *jit_tracer_previous_exit; _PyInterpreterFrame *jit_tracer_current_frame; -} _PyJitState; +} _PyJitTracerState; /* PyInterpreterState holds the global state for one of the runtime's interpreters. Typically the initial (main) interpreter is the only one. @@ -949,7 +949,7 @@ struct _is { struct types_state types; struct callable_cache callable_cache; PyObject *common_consts[NUM_COMMON_CONSTANTS]; - _PyJitState jit_state; + _PyJitTracerState jit_state; bool jit; bool compiling; struct _PyExecutorObject *executor_list_head; diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 5d642604515e..e8e0c59874a5 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1777,30 +1777,17 @@ const uint8_t _PyOpcode_NeedsGuardIp[256] = { [INTERPRETER_EXIT] = 1, [RETURN_VALUE] = 1, [YIELD_VALUE] = 1, - [JUMP_FORWARD] = 1, - [JUMP_BACKWARD_NO_INTERRUPT] = 1, - [INSTRUMENTED_FOR_ITER] = 1, + [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = 1, [RETURN_GENERATOR] = 1, [BINARY_OP_SUBSCR_GETITEM] = 1, [INSTRUMENTED_RETURN_VALUE] = 1, [SEND] = 1, [SEND_GEN] = 1, [INSTRUMENTED_YIELD_VALUE] = 1, - [INSTRUMENTED_END_ASYNC_FOR] = 1, - [END_ASYNC_FOR] = 1, [LOAD_ATTR_PROPERTY] = 1, - [JUMP_BACKWARD] = 1, - [JUMP_BACKWARD_NO_JIT] = 1, - [JUMP_BACKWARD_JIT] = 1, - [POP_JUMP_IF_TRUE] = 1, - [POP_JUMP_IF_FALSE] = 1, - [POP_JUMP_IF_NONE] = 1, - [POP_JUMP_IF_NOT_NONE] = 1, - [FOR_ITER] = 1, - [FOR_ITER_LIST] = 1, - [FOR_ITER_TUPLE] = 1, - [FOR_ITER_RANGE] = 1, [FOR_ITER_GEN] = 1, + [CALL] = 1, + [INSTRUMENTED_CALL] = 1, [CALL_PY_GENERAL] = 1, [CALL_BOUND_METHOD_GENERAL] = 1, [CALL_BOUND_METHOD_EXACT_ARGS] = 1, @@ -1808,6 +1795,10 @@ const uint8_t _PyOpcode_NeedsGuardIp[256] = { [CALL_ALLOC_AND_ENTER_INIT] = 1, [CALL_KW_PY] = 1, [CALL_KW_BOUND_METHOD] = 1, + [CALL_KW] = 1, + [INSTRUMENTED_CALL_KW] = 1, + [CALL_FUNCTION_EX] = 1, + [INSTRUMENTED_CALL_FUNCTION_EX] = 1, }; #endif diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index b06593377324..0acf19402769 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -37,7 +37,6 @@ typedef struct { typedef struct _PyExitData { uint32_t target; uint16_t index; - char is_dynamic; _Py_BackoffCounter temperature; struct _PyExecutorObject *executor; } _PyExitData; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7df899dc50e8..ab5649281d90 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1378,7 +1378,7 @@ dummy_func( if (err == 0) { assert(retval_o != NULL); JUMPBY(oparg); - RECORD_JUMP_TAKEN(); + RECORD_DYNAMIC_JUMP_TAKEN(); } else { PyStackRef_CLOSE(v); @@ -3241,7 +3241,7 @@ dummy_func( } // Jump forward by oparg and skip the following END_FOR JUMPBY(oparg + 1); - RECORD_JUMP_TAKEN(); + RECORD_DYNAMIC_JUMP_TAKEN(); DISPATCH(); } next = item; @@ -3303,7 +3303,7 @@ dummy_func( null_or_index = PyStackRef_TagInt(-1); /* Jump forward oparg, then skip following END_FOR instruction */ JUMPBY(oparg + 1); - RECORD_JUMP_TAKEN(); + RECORD_DYNAMIC_JUMP_TAKEN(); DISPATCH(); } #endif @@ -3381,7 +3381,7 @@ dummy_func( null_or_index = PyStackRef_TagInt(-1); /* Jump forward oparg, then skip following END_FOR instruction */ JUMPBY(oparg + 1); - RECORD_JUMP_TAKEN(); + RECORD_DYNAMIC_JUMP_TAKEN(); DISPATCH(); } } @@ -3426,7 +3426,7 @@ dummy_func( if (r->len <= 0) { // Jump over END_FOR instruction. JUMPBY(oparg + 1); - RECORD_JUMP_TAKEN(); + RECORD_DYNAMIC_JUMP_TAKEN(); DISPATCH(); } } @@ -5011,7 +5011,7 @@ dummy_func( LOAD_IP(frame->return_offset); #endif #if TIER_TWO - frame->instr_ptr += (frame->return_offset); + TIER2_STORE_IP(frame->return_offset); #endif RELOAD_STACK(); res = PyStackRef_FromPyObjectStealMortal((PyObject *)gen); @@ -5278,7 +5278,6 @@ dummy_func( tier2 op(_EXIT_TRACE, (exit_p/4 --)) { _PyExitData *exit = (_PyExitData *)exit_p; - assert(!exit->is_dynamic); #if defined(Py_DEBUG) && !defined(_Py_JIT) _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target; OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 256175b476dc..e85adf88c679 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -130,7 +130,7 @@ #endif #define TRACING_JUMP_TO_LABEL(label) \ - RECORD_JUMP_TAKEN() \ + RECORD_DYNAMIC_JUMP_TAKEN() \ RECORD_TRACE_NO_DISPATCH() \ assert(!IS_JIT_TRACING()); \ JUMP_TO_LABEL(label); @@ -381,7 +381,7 @@ GETITEM(PyObject *v, Py_ssize_t i) { #define RECORD_BRANCH_TAKEN(bitset, flag) #endif -#define RECORD_JUMP_TAKEN() _jump_taken = 1; +#define RECORD_DYNAMIC_JUMP_TAKEN() _jump_taken = 1; #define UNBOUNDLOCAL_ERROR_MSG \ "cannot access local variable '%s' where it is not associated with a value" diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 03846f48d538..7e76e0a0719e 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -6767,7 +6767,7 @@ LOAD_IP(frame->return_offset); #endif #if TIER_TWO - frame->instr_ptr += (frame->return_offset); + TIER2_STORE_IP(frame->return_offset); #endif stack_pointer = _PyFrame_GetStackPointer(frame); res = PyStackRef_FromPyObjectStealMortal((PyObject *)gen); @@ -7127,7 +7127,6 @@ case _EXIT_TRACE: { PyObject *exit_p = (PyObject *)CURRENT_OPERAND0(); _PyExitData *exit = (_PyExitData *)exit_p; - assert(!exit->is_dynamic); #if defined(Py_DEBUG) && !defined(_Py_JIT) _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target; OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 5cd7142adb33..0aeff340806f 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -10388,7 +10388,7 @@ LOAD_IP(frame->return_offset); #endif #if TIER_TWO - frame->instr_ptr += (frame->return_offset); + TIER2_STORE_IP(frame->return_offset); #endif stack_pointer = _PyFrame_GetStackPointer(frame); res = PyStackRef_FromPyObjectStealMortal((PyObject *)gen); diff --git a/Python/generated_tracer_cases.c.h b/Python/generated_tracer_cases.c.h index 759bfd13cb83..123315df9e5f 100644 --- a/Python/generated_tracer_cases.c.h +++ b/Python/generated_tracer_cases.c.h @@ -6547,7 +6547,7 @@ TRACING_JUMP_TO_LABEL(error); } JUMPBY(oparg + 1); - RECORD_JUMP_TAKEN(); + RECORD_DYNAMIC_JUMP_TAKEN(); stack_pointer[-1] = null_or_index; TRACING_DISPATCH(); } @@ -6696,7 +6696,7 @@ if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyList_GET_SIZE(list_o)) { null_or_index = PyStackRef_TagInt(-1); JUMPBY(oparg + 1); - RECORD_JUMP_TAKEN(); + RECORD_DYNAMIC_JUMP_TAKEN(); stack_pointer[-1] = null_or_index; TRACING_DISPATCH(); } @@ -6787,7 +6787,7 @@ STAT_INC(FOR_ITER, hit); if (r->len <= 0) { JUMPBY(oparg + 1); - RECORD_JUMP_TAKEN(); + RECORD_DYNAMIC_JUMP_TAKEN(); TRACING_DISPATCH(); } } @@ -6860,7 +6860,7 @@ if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyTuple_GET_SIZE(tuple_o)) { null_or_index = PyStackRef_TagInt(-1); JUMPBY(oparg + 1); - RECORD_JUMP_TAKEN(); + RECORD_DYNAMIC_JUMP_TAKEN(); stack_pointer[-1] = null_or_index; TRACING_DISPATCH(); } @@ -12397,7 +12397,7 @@ LOAD_IP(frame->return_offset); #endif #if TIER_TWO - frame->instr_ptr += (frame->return_offset); + TIER2_STORE_IP(frame->return_offset); #endif stack_pointer = _PyFrame_GetStackPointer(frame); res = PyStackRef_FromPyObjectStealMortal((PyObject *)gen); @@ -12548,7 +12548,7 @@ if (err == 0) { assert(retval_o != NULL); JUMPBY(oparg); - RECORD_JUMP_TAKEN(); + RECORD_DYNAMIC_JUMP_TAKEN(); } else { stack_pointer += -1; diff --git a/Python/optimizer.c b/Python/optimizer.c index ca9967e4272d..c7967d090936 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -141,13 +141,7 @@ _PyOptimizer_Optimize( interp->compiling = false; return 0; } - // We are the only one still holding a reference to this code object that - // is practically dead. - if (_PyObject_IsUniquelyReferenced((PyObject *)code) || _PyObject_IsUniquelyReferenced((PyObject *)tstate->interp->jit_state.jit_tracer_initial_func)) { - interp->compiling = false; - return 0; - } - // One of our depencies while tracing was invalidated. Not worth compiling. + // One of our dependencies while tracing was invalidated. Not worth compiling. if (!tstate->interp->jit_state.jit_tracer_dependencies_still_valid) { interp->compiling = false; return 0; @@ -603,12 +597,13 @@ _PyJIT_translate_single_bytecode_to_trace( } #endif + if (!tstate->interp->jit_state.jit_tracer_dependencies_still_valid) { + goto done; + } + DPRINTF(2, "%p %d: %s(%d) %d\n", old_code, target, _PyOpcode_OpName[opcode], oparg, progress_needed); - bool needs_guard_ip = _PyOpcode_NeedsGuardIp[opcode] && - !(opcode == FOR_ITER_RANGE || opcode == FOR_ITER_LIST || opcode == FOR_ITER_TUPLE) && - !(opcode == JUMP_BACKWARD_NO_INTERRUPT || opcode == JUMP_BACKWARD || opcode == JUMP_BACKWARD_JIT) && - !(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_NONE || opcode == POP_JUMP_IF_NOT_NONE); + bool needs_guard_ip = _PyOpcode_NeedsGuardIp[opcode]; // Strange control-flow, unsupported opcode, etc. if (jump_taken || @@ -616,7 +611,7 @@ _PyJIT_translate_single_bytecode_to_trace( // If we haven't guarded the IP, then it's untraceable. (frame != tstate->interp->jit_state.jit_tracer_current_frame && !needs_guard_ip) || (oparg > 0xFFFF) || - // TODO (gh-140277): The constituent use one extra stack slot. So we need to check for heaedroom. + // TODO (gh-140277): The constituent use one extra stack slot. So we need to check for headroom. (opcode == BINARY_OP_SUBSCR_GETITEM && old_stack_level + 1 > old_code->co_stacksize)|| // Exception stuff, could be handled in the future maybe? opcode == WITH_EXCEPT_START || opcode == RERAISE || opcode == CLEANUP_THROW || opcode == PUSH_EXC_INFO || @@ -1155,7 +1150,6 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil _PyExitData *exit = &executor->exits[next_exit]; exit->target = buffer[i].target; dest->operand0 = (uint64_t)exit; - exit->is_dynamic = (char)(opcode == _DYNAMIC_EXIT); next_exit--; } } @@ -1561,20 +1555,15 @@ error: } void -_Py_JITTracer_InvalidateDependency(PyThreadState *old_tstate, void *obj) +_Py_JITTracer_InvalidateDependency(PyThreadState *tstate, void *obj) { _PyBloomFilter obj_filter; _Py_BloomFilter_Init(&obj_filter); _Py_BloomFilter_Add(&obj_filter, obj); - PyInterpreterState *interp = old_tstate->interp; - - _Py_FOR_EACH_TSTATE_UNLOCKED(interp, tstate) { - if (bloom_filter_may_contain(&tstate->interp->jit_state.jit_tracer_dependencies, &obj_filter)) - { - tstate->interp->jit_state.jit_tracer_dependencies_still_valid = false; - } - + if (bloom_filter_may_contain(&tstate->interp->jit_state.jit_tracer_dependencies, &obj_filter)) + { + tstate->interp->jit_state.jit_tracer_dependencies_still_valid = false; } } /* Invalidate all executors */ diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 9f2a12efee67..2d3f1d95d5ab 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -528,10 +528,14 @@ _Py_uop_analyze_and_optimize( { OPT_STAT_INC(optimizer_attempts); - optimize_uops( + length = optimize_uops( initial_func, buffer, length, curr_stacklen, dependencies); + if (length == 0) { + return length; + } + assert(length > 0); length = remove_unneeded_uops(buffer, length); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index facdabef83e7..720bb5f0ec61 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -937,7 +937,7 @@ def compute_properties(op: parser.CodeDef) -> Properties: no_save_ip=no_save_ip, tier=tier_variable(op), needs_prev=variable_used(op, "prev_instr"), - needs_guard_ip=variable_used(op, "JUMPBY") or variable_used(op, "LLTRACE_RESUME_FRAME"), + needs_guard_ip=variable_used(op, "TIER2_STORE_IP") or variable_used(op, "LLTRACE_RESUME_FRAME") or variable_used(op, "DISPATCH_INLINED"), ) def expand(items: list[StackItem], oparg: int) -> list[StackItem]: diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 8a95e4ea6a34..2d5f4607ab46 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -128,7 +128,7 @@ class Emitter: "DISPATCH": self.dispatch, "INSTRUCTION_SIZE": self.instruction_size, "stack_pointer": self.stack_pointer, - "RECORD_JUMP_TAKEN": self.record_jump_taken, + "RECORD_DYNAMIC_JUMP_TAKEN": self.record_dynamic_jump_taken, } self.out = out self.labels = labels @@ -476,7 +476,7 @@ class Emitter: self.out.emit(f" {uop.instruction_size}u ") return True - def record_jump_taken( + def record_dynamic_jump_taken( self, tkn: Token, tkn_iter: TokenIterator, diff --git a/Tools/cases_generator/tracer_generator.py b/Tools/cases_generator/tracer_generator.py index 912056b6212a..7227cca90f51 100644 --- a/Tools/cases_generator/tracer_generator.py +++ b/Tools/cases_generator/tracer_generator.py @@ -96,7 +96,7 @@ class TracerEmitter(Emitter): emit_to(self.out, tkn_iter, "SEMI") return False - def record_jump_taken( + def record_dynamic_jump_taken( self, tkn: Token, tkn_iter: TokenIterator, -- 2.47.3