From 7192671f52b13a8748354e900609e36b08942e2e Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 20 Sep 2025 17:14:34 +0100 Subject: [PATCH] Fully working bm_generators --- Python/bytecodes.c | 3 +- Python/ceval.c | 7 ++-- Python/executor_cases.c.h | 3 +- Python/generated_cases.c.h | 4 +-- Python/optimizer.c | 44 ++++++++++++------------ Python/optimizer_analysis.c | 2 +- Tools/cases_generator/tier2_generator.py | 10 ------ 7 files changed, 33 insertions(+), 40 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index e972e4732b48..a0983a474af8 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3033,7 +3033,7 @@ dummy_func( /* If the eval breaker is set then stay in tier 1. * This avoids any potentially infinite loops * involving _RESUME_CHECK */ - if (IS_JIT_TRACING() || _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) { + if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) { opcode = executor->vm_data.opcode; oparg = (oparg & ~255) | executor->vm_data.oparg; next_instr = this_instr; @@ -3236,6 +3236,7 @@ dummy_func( } op(_FOR_ITER_TIER_TWO, (iter, null_or_index -- iter, null_or_index, next)) { + TIER2_JUMPBY(1 + INLINE_CACHE_ENTRIES_FOR_ITER); _PyStackRef item = _PyForIter_VirtualIteratorNext(tstate, frame, iter, &null_or_index); if (!PyStackRef_IsValid(item)) { if (PyStackRef_IsError(item)) { diff --git a/Python/ceval.c b/Python/ceval.c index 940f27bd5661..3eed68821021 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1195,8 +1195,11 @@ tier2_dispatch: for (;;) { uopcode = next_uop->opcode; #ifdef Py_DEBUG - if (frame->lltrace >= 4 && next_uop->opcode != _YIELD_VALUE) { - // dump_stack(frame, stack_pointer); + if (frame->lltrace >= 4 && + next_uop->opcode != _YIELD_VALUE && + next_uop->opcode != _FOR_ITER_GEN_FRAME && + next_uop->opcode != _PUSH_FRAME) { + dump_stack(frame, stack_pointer); if (next_uop->opcode == _START_EXECUTOR) { printf("%4d uop: ", 0); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 1000d7dedab0..f6c729f53350 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4196,7 +4196,6 @@ } case _JUMP_BACKWARD_NO_INTERRUPT: { - TIER2_JUMPBY(2); oparg = CURRENT_OPARG(); #if TIER_ONE assert(oparg <= INSTR_OFFSET()); @@ -4402,6 +4401,7 @@ oparg = CURRENT_OPARG(); null_or_index = stack_pointer[-1]; iter = stack_pointer[-2]; + TIER2_JUMPBY(1 + INLINE_CACHE_ENTRIES_FOR_ITER); _PyFrame_SetStackPointer(frame, stack_pointer); _PyStackRef item = _PyForIter_VirtualIteratorNext(tstate, frame, iter, &null_or_index); stack_pointer = _PyFrame_GetStackPointer(frame); @@ -4464,7 +4464,6 @@ } case _ITER_NEXT_LIST: { - TIER2_JUMPBY(2); _PyStackRef null_or_index; _PyStackRef iter; _PyStackRef next; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 007d97f8b115..f7b890621e69 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5640,7 +5640,7 @@ assert(executor->vm_data.code == code); assert(executor->vm_data.valid); assert(tstate->current_executor == NULL); - if (IS_JIT_TRACING() || _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) { + if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) { opcode = executor->vm_data.opcode; oparg = (oparg & ~255) | executor->vm_data.oparg; next_instr = this_instr; @@ -18323,7 +18323,7 @@ assert(executor->vm_data.code == code); assert(executor->vm_data.valid); assert(tstate->current_executor == NULL); - if (IS_JIT_TRACING() || _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) { + if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) { opcode = executor->vm_data.opcode; oparg = (oparg & ~255) | executor->vm_data.oparg; next_instr = this_instr; diff --git a/Python/optimizer.c b/Python/optimizer.c index 21a0fb71651e..4de08899456b 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -568,7 +568,7 @@ _PyJIT_translate_single_bytecode_to_trace( target = INSTR_IP(target_instr, old_code); // One for possible _DEOPT, one because _CHECK_VALIDITY itself might _DEOPT - max_length-=2; + max_length -= 2; if ((uint16_t)oparg != (uint64_t)oparg) { goto full; } @@ -583,17 +583,13 @@ _PyJIT_translate_single_bytecode_to_trace( return 1; } - // Unsupported opcodes - if (opcode == WITH_EXCEPT_START || opcode == RERAISE || opcode == CLEANUP_THROW) { + if (opcode == ENTER_EXECUTOR) { goto full; } - RESERVE_RAW(1, "_CHECK_VALIDITY"); - ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, target); - - if (!OPCODE_HAS_NO_SAVE_IP(opcode)) { - RESERVE_RAW(2, "_SET_IP"); - ADD_TO_TRACE(_SET_IP, 0, (uintptr_t)target_instr, target); + // Unsupported opcodes + if (opcode == WITH_EXCEPT_START || opcode == RERAISE || opcode == CLEANUP_THROW) { + goto full; } bool needs_guard_ip = _PyOpcode_NeedsGuardIp[opcode] && @@ -601,8 +597,13 @@ _PyJIT_translate_single_bytecode_to_trace( !(opcode == JUMP_BACKWARD_NO_INTERRUPT || opcode == JUMP_BACKWARD || opcode == JUMP_BACKWARD_JIT) && !(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_NONE || opcode == POP_JUMP_IF_NOT_NONE); - if (needs_guard_ip) { - RESERVE_RAW(1, "_GUARD_IP"); + const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode]; + RESERVE_RAW(expansion->nuops + needs_guard_ip + 3, "uop and various checks"); + + ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, target); + + if (!OPCODE_HAS_NO_SAVE_IP(opcode)) { + ADD_TO_TRACE(_SET_IP, 0, (uintptr_t)target_instr, target); } /* Special case the first instruction, @@ -623,12 +624,10 @@ _PyJIT_translate_single_bytecode_to_trace( if (OPCODE_HAS_EXIT(opcode)) { // Make space for side exit and final _EXIT_TRACE: - RESERVE_RAW(2, "_EXIT_TRACE"); max_length--; } if (OPCODE_HAS_ERROR(opcode)) { // Make space for error stub and final _EXIT_TRACE: - RESERVE_RAW(2, "_ERROR_POP_N"); max_length--; } @@ -643,7 +642,9 @@ _PyJIT_translate_single_bytecode_to_trace( int bitcount = counter & 1; int jump_likely = bitcount; uint32_t uopcode = BRANCH_TO_GUARD[opcode - POP_JUMP_IF_FALSE][jump_likely]; - ADD_TO_TRACE(uopcode, 0, 0, INSTR_IP(target_instr, old_code)); + _Py_CODEUNIT *next_instr = target_instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]; + _Py_CODEUNIT *false_target = next_instr + oparg; + ADD_TO_TRACE(uopcode, 0, 0, INSTR_IP(false_target, old_code)); break; } case JUMP_BACKWARD_JIT: @@ -704,13 +705,6 @@ _PyJIT_translate_single_bytecode_to_trace( if (uop == _TIER2_RESUME_CHECK) { target = next_inst; } -#ifdef Py_DEBUG - else { - uint32_t jump_target = next_inst + oparg; - assert(_Py_GetBaseCodeUnit(old_code, jump_target).op.code == END_FOR); - assert(_Py_GetBaseCodeUnit(old_code, jump_target+1).op.code == POP_ITER); - } -#endif break; case OPERAND1_1: assert(trace[trace_length-1].opcode == uop); @@ -772,6 +766,8 @@ full: if (!is_terminator(&tstate->interp->jit_tracer_code_buffer[trace_length-1])) { // Undo the last few instructions. trace_length = tstate->interp->jit_tracer_code_curr_size; + // We previously reversed one. + max_length += 1; ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target); } tstate->interp->jit_tracer_code_curr_size = trace_length; @@ -1121,10 +1117,14 @@ uop_optimize( char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE"); bool is_noopt = true; if (env_var == NULL || *env_var == '\0' || *env_var > '0') { - is_noopt = true; + is_noopt = false; } int curr_stackentries = tstate->interp->jit_tracer_initial_stack_depth; int length = interp->jit_tracer_code_curr_size; + // Trace too short, don't bother. + if (length <= 8) { + return 0; + } assert(length > 0); assert(length < UOP_MAX_TRACE_LENGTH); OPT_STAT_INC(traces_created); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 21410f1016ad..61a31adc6887 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -493,7 +493,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } /* _PUSH_FRAME doesn't escape or error, but it * does need the IP for the return address */ - bool needs_ip = opcode == _PUSH_FRAME; + bool needs_ip = (opcode == _PUSH_FRAME || opcode == _YIELD_VALUE); if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) { needs_ip = true; may_have_escaped = true; diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py index 35a10e7bebed..28adb132874d 100644 --- a/Tools/cases_generator/tier2_generator.py +++ b/Tools/cases_generator/tier2_generator.py @@ -226,16 +226,6 @@ def generate_tier2( ) continue out.emit(f"case {uop.name}: {{\n") - if uop.properties.jumps: - containing_inst = None - for inst in analysis.instructions.values(): - if uop in inst.parts: - print(uop.name, inst.name) - containing_inst = inst - break - assert containing_inst is not None, uop.name - size = containing_inst.size - out.emit(f"TIER2_JUMPBY({size});\n") declare_variables(uop, out) stack = Stack() stack = write_uop(uop, emitter, stack) -- 2.47.3