]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
Fully working bm_generators
authorKen Jin <28750310+Fidget-Spinner@users.noreply.github.com>
Sat, 20 Sep 2025 16:14:34 +0000 (17:14 +0100)
committerKen Jin <28750310+Fidget-Spinner@users.noreply.github.com>
Sat, 20 Sep 2025 16:14:34 +0000 (17:14 +0100)
Python/bytecodes.c
Python/ceval.c
Python/executor_cases.c.h
Python/generated_cases.c.h
Python/optimizer.c
Python/optimizer_analysis.c
Tools/cases_generator/tier2_generator.py

index e972e4732b48a202534abb1978c126759b521b26..a0983a474af801d0f05007fb9fc945ee707a1a54 100644 (file)
@@ -3033,7 +3033,7 @@ dummy_func(
             /* If the eval breaker is set then stay in tier 1.
              * This avoids any potentially infinite loops
              * involving _RESUME_CHECK */
-            if (IS_JIT_TRACING() || _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) {
+            if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) {
                 opcode = executor->vm_data.opcode;
                 oparg = (oparg & ~255) | executor->vm_data.oparg;
                 next_instr = this_instr;
@@ -3236,6 +3236,7 @@ dummy_func(
         }
 
         op(_FOR_ITER_TIER_TWO, (iter, null_or_index -- iter, null_or_index, next)) {
+            TIER2_JUMPBY(1 + INLINE_CACHE_ENTRIES_FOR_ITER);
             _PyStackRef item = _PyForIter_VirtualIteratorNext(tstate, frame, iter, &null_or_index);
             if (!PyStackRef_IsValid(item)) {
                 if (PyStackRef_IsError(item)) {
index 940f27bd56611df7cac9a8db8ba4ae7882e73c1c..3eed68821021f059eb2f79d67abec0715d04f389 100644 (file)
@@ -1195,8 +1195,11 @@ tier2_dispatch:
     for (;;) {
         uopcode = next_uop->opcode;
 #ifdef Py_DEBUG
-        if (frame->lltrace >= 4 && next_uop->opcode != _YIELD_VALUE) {
-            // dump_stack(frame, stack_pointer);
+        if (frame->lltrace >= 4 &&
+            next_uop->opcode != _YIELD_VALUE &&
+            next_uop->opcode != _FOR_ITER_GEN_FRAME &&
+            next_uop->opcode != _PUSH_FRAME) {
+            dump_stack(frame, stack_pointer);
             if (next_uop->opcode == _START_EXECUTOR) {
                 printf("%4d uop: ", 0);
             }
index 1000d7dedab06846c9ef045fa3df1f499539fecd..f6c729f533504c1e3496e814c4f017e05ad08120 100644 (file)
         }
 
         case _JUMP_BACKWARD_NO_INTERRUPT: {
-            TIER2_JUMPBY(2);
             oparg = CURRENT_OPARG();
             #if TIER_ONE
             assert(oparg <= INSTR_OFFSET());
             oparg = CURRENT_OPARG();
             null_or_index = stack_pointer[-1];
             iter = stack_pointer[-2];
+            TIER2_JUMPBY(1 + INLINE_CACHE_ENTRIES_FOR_ITER);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             _PyStackRef item = _PyForIter_VirtualIteratorNext(tstate, frame, iter, &null_or_index);
             stack_pointer = _PyFrame_GetStackPointer(frame);
         }
 
         case _ITER_NEXT_LIST: {
-            TIER2_JUMPBY(2);
             _PyStackRef null_or_index;
             _PyStackRef iter;
             _PyStackRef next;
index 007d97f8b1151708805650a52897217b8f8b56e0..f7b890621e6980040132e10c3f027a7d5e010fa0 100644 (file)
             assert(executor->vm_data.code == code);
             assert(executor->vm_data.valid);
             assert(tstate->current_executor == NULL);
-            if (IS_JIT_TRACING() || _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) {
+            if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) {
                 opcode = executor->vm_data.opcode;
                 oparg = (oparg & ~255) | executor->vm_data.oparg;
                 next_instr = this_instr;
             assert(executor->vm_data.code == code);
             assert(executor->vm_data.valid);
             assert(tstate->current_executor == NULL);
-            if (IS_JIT_TRACING() || _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) {
+            if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) {
                 opcode = executor->vm_data.opcode;
                 oparg = (oparg & ~255) | executor->vm_data.oparg;
                 next_instr = this_instr;
index 21a0fb71651e2db815b4d0544b893f37dc9e635d..4de08899456baec532e98fce4d57ebee9ebb7736 100644 (file)
@@ -568,7 +568,7 @@ _PyJIT_translate_single_bytecode_to_trace(
 
     target = INSTR_IP(target_instr, old_code);
     // One for possible _DEOPT, one because _CHECK_VALIDITY itself might _DEOPT
-    max_length-=2;
+    max_length -= 2;
     if ((uint16_t)oparg != (uint64_t)oparg) {
         goto full;
     }
@@ -583,17 +583,13 @@ _PyJIT_translate_single_bytecode_to_trace(
         return 1;
     }
 
-    // Unsupported opcodes
-    if (opcode == WITH_EXCEPT_START || opcode == RERAISE || opcode == CLEANUP_THROW) {
+    if (opcode == ENTER_EXECUTOR) {
         goto full;
     }
 
-    RESERVE_RAW(1, "_CHECK_VALIDITY");
-    ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, target);
-
-    if (!OPCODE_HAS_NO_SAVE_IP(opcode)) {
-        RESERVE_RAW(2, "_SET_IP");
-        ADD_TO_TRACE(_SET_IP, 0, (uintptr_t)target_instr, target);
+    // Unsupported opcodes
+    if (opcode == WITH_EXCEPT_START || opcode == RERAISE || opcode == CLEANUP_THROW) {
+        goto full;
     }
 
     bool needs_guard_ip = _PyOpcode_NeedsGuardIp[opcode] &&
@@ -601,8 +597,13 @@ _PyJIT_translate_single_bytecode_to_trace(
         !(opcode == JUMP_BACKWARD_NO_INTERRUPT || opcode == JUMP_BACKWARD || opcode == JUMP_BACKWARD_JIT) &&
         !(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE || opcode == POP_JUMP_IF_NONE || opcode == POP_JUMP_IF_NOT_NONE);
 
-    if (needs_guard_ip) {
-        RESERVE_RAW(1, "_GUARD_IP");
+    const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode];
+    RESERVE_RAW(expansion->nuops + needs_guard_ip + 3, "uop and various checks");
+
+    ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, target);
+
+    if (!OPCODE_HAS_NO_SAVE_IP(opcode)) {
+        ADD_TO_TRACE(_SET_IP, 0, (uintptr_t)target_instr, target);
     }
 
     /* Special case the first instruction,
@@ -623,12 +624,10 @@ _PyJIT_translate_single_bytecode_to_trace(
 
     if (OPCODE_HAS_EXIT(opcode)) {
         // Make space for side exit and final _EXIT_TRACE:
-        RESERVE_RAW(2, "_EXIT_TRACE");
         max_length--;
     }
     if (OPCODE_HAS_ERROR(opcode)) {
         // Make space for error stub and final _EXIT_TRACE:
-        RESERVE_RAW(2, "_ERROR_POP_N");
         max_length--;
     }
 
@@ -643,7 +642,9 @@ _PyJIT_translate_single_bytecode_to_trace(
             int bitcount = counter & 1;
             int jump_likely = bitcount;
             uint32_t uopcode = BRANCH_TO_GUARD[opcode - POP_JUMP_IF_FALSE][jump_likely];
-            ADD_TO_TRACE(uopcode, 0, 0, INSTR_IP(target_instr, old_code));
+            _Py_CODEUNIT *next_instr = target_instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
+            _Py_CODEUNIT *false_target = next_instr + oparg;
+            ADD_TO_TRACE(uopcode, 0, 0, INSTR_IP(false_target, old_code));
             break;
         }
         case JUMP_BACKWARD_JIT:
@@ -704,13 +705,6 @@ _PyJIT_translate_single_bytecode_to_trace(
                             if (uop == _TIER2_RESUME_CHECK) {
                                 target = next_inst;
                             }
-#ifdef Py_DEBUG
-                            else {
-                                uint32_t jump_target = next_inst + oparg;
-                                assert(_Py_GetBaseCodeUnit(old_code, jump_target).op.code == END_FOR);
-                                assert(_Py_GetBaseCodeUnit(old_code, jump_target+1).op.code == POP_ITER);
-                            }
-#endif
                             break;
                         case OPERAND1_1:
                             assert(trace[trace_length-1].opcode == uop);
@@ -772,6 +766,8 @@ full:
     if (!is_terminator(&tstate->interp->jit_tracer_code_buffer[trace_length-1])) {
         // Undo the last few instructions.
         trace_length = tstate->interp->jit_tracer_code_curr_size;
+        // We previously reversed one.
+        max_length += 1;
         ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
     }
     tstate->interp->jit_tracer_code_curr_size = trace_length;
@@ -1121,10 +1117,14 @@ uop_optimize(
     char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE");
     bool is_noopt = true;
     if (env_var == NULL || *env_var == '\0' || *env_var > '0') {
-        is_noopt = true;
+        is_noopt = false;
     }
     int curr_stackentries = tstate->interp->jit_tracer_initial_stack_depth;
     int length = interp->jit_tracer_code_curr_size;
+    // Trace too short, don't bother.
+    if (length <= 8) {
+        return 0;
+    }
     assert(length > 0);
     assert(length < UOP_MAX_TRACE_LENGTH);
     OPT_STAT_INC(traces_created);
index 21410f1016ad5d9cf215eaa5f83ba799bbf31c22..61a31adc6887dc99fd0ec001bf2b444eb6f682c0 100644 (file)
@@ -493,7 +493,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
                 }
                 /* _PUSH_FRAME doesn't escape or error, but it
                  * does need the IP for the return address */
-                bool needs_ip = opcode == _PUSH_FRAME;
+                bool needs_ip = (opcode == _PUSH_FRAME || opcode == _YIELD_VALUE);
                 if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) {
                     needs_ip = true;
                     may_have_escaped = true;
index 35a10e7bebed1e1effaa32180c1af514d72e7c9a..28adb132874d7a05166887653c87cb81877a7b9f 100644 (file)
@@ -226,16 +226,6 @@ def generate_tier2(
             )
             continue
         out.emit(f"case {uop.name}: {{\n")
-        if uop.properties.jumps:
-            containing_inst = None
-            for inst in analysis.instructions.values():
-                if uop in inst.parts:
-                    print(uop.name, inst.name)
-                    containing_inst = inst
-                    break
-            assert containing_inst is not None, uop.name
-            size = containing_inst.size
-            out.emit(f"TIER2_JUMPBY({size});\n")
         declare_variables(uop, out)
         stack = Stack()
         stack = write_uop(uop, emitter, stack)