]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-118095: Add dynamic exit support and FOR_ITER_GEN support to tier 2 (GH-118279)
authorMark Shannon <mark@hotpy.org>
Fri, 26 Apr 2024 17:08:50 +0000 (18:08 +0100)
committerGitHub <noreply@github.com>
Fri, 26 Apr 2024 17:08:50 +0000 (18:08 +0100)
12 files changed:
Include/internal/pycore_opcode_metadata.h
Include/internal/pycore_uop_ids.h
Include/internal/pycore_uop_metadata.h
Lib/test/test_capi/test_opt.py
Python/bytecodes.c
Python/ceval.c
Python/ceval_macros.h
Python/executor_cases.c.h
Python/generated_cases.c.h
Python/optimizer.c
Python/optimizer_cases.c.h
Tools/jit/template.c

index 400d7c334db8e73125e94e3568e678ca55f18afa..4b1f43cf2af06e7a42c6d802d0676fed95bf1232 100644 (file)
@@ -625,7 +625,7 @@ int _PyOpcode_num_pushed(int opcode, int oparg)  {
         case FOR_ITER:
             return 2;
         case FOR_ITER_GEN:
-            return 2;
+            return 1;
         case FOR_ITER_LIST:
             return 2;
         case FOR_ITER_RANGE:
@@ -1253,6 +1253,7 @@ _PyOpcode_macro_expansion[256] = {
     [FORMAT_SIMPLE] = { .nuops = 1, .uops = { { _FORMAT_SIMPLE, 0, 0 } } },
     [FORMAT_WITH_SPEC] = { .nuops = 1, .uops = { { _FORMAT_WITH_SPEC, 0, 0 } } },
     [FOR_ITER] = { .nuops = 1, .uops = { { _FOR_ITER, 9, 0 } } },
+    [FOR_ITER_GEN] = { .nuops = 3, .uops = { { _CHECK_PEP_523, 0, 0 }, { _FOR_ITER_GEN_FRAME, 0, 0 }, { _PUSH_FRAME, 0, 0 } } },
     [FOR_ITER_LIST] = { .nuops = 3, .uops = { { _ITER_CHECK_LIST, 0, 0 }, { _ITER_JUMP_LIST, 9, 1 }, { _ITER_NEXT_LIST, 0, 0 } } },
     [FOR_ITER_RANGE] = { .nuops = 3, .uops = { { _ITER_CHECK_RANGE, 0, 0 }, { _ITER_JUMP_RANGE, 9, 1 }, { _ITER_NEXT_RANGE, 0, 0 } } },
     [FOR_ITER_TUPLE] = { .nuops = 3, .uops = { { _ITER_CHECK_TUPLE, 0, 0 }, { _ITER_JUMP_TUPLE, 9, 1 }, { _ITER_NEXT_TUPLE, 0, 0 } } },
index bb49d6e77d25622c5e1cce3315937d4007d3e0b1..beb182c436d52abc2520ce0cdfc7ad5b145c1933 100644 (file)
@@ -91,48 +91,49 @@ extern "C" {
 #define _DEOPT 342
 #define _DICT_MERGE DICT_MERGE
 #define _DICT_UPDATE DICT_UPDATE
+#define _DYNAMIC_EXIT 343
 #define _END_SEND END_SEND
-#define _ERROR_POP_N 343
+#define _ERROR_POP_N 344
 #define _EXIT_INIT_CHECK EXIT_INIT_CHECK
-#define _FATAL_ERROR 344
+#define _FATAL_ERROR 345
 #define _FORMAT_SIMPLE FORMAT_SIMPLE
 #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC
-#define _FOR_ITER 345
-#define _FOR_ITER_GEN FOR_ITER_GEN
-#define _FOR_ITER_TIER_TWO 346
+#define _FOR_ITER 346
+#define _FOR_ITER_GEN_FRAME 347
+#define _FOR_ITER_TIER_TWO 348
 #define _GET_AITER GET_AITER
 #define _GET_ANEXT GET_ANEXT
 #define _GET_AWAITABLE GET_AWAITABLE
 #define _GET_ITER GET_ITER
 #define _GET_LEN GET_LEN
 #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER
-#define _GUARD_BOTH_FLOAT 347
-#define _GUARD_BOTH_INT 348
-#define _GUARD_BOTH_UNICODE 349
-#define _GUARD_BUILTINS_VERSION 350
-#define _GUARD_DORV_NO_DICT 351
-#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 352
-#define _GUARD_GLOBALS_VERSION 353
-#define _GUARD_IS_FALSE_POP 354
-#define _GUARD_IS_NONE_POP 355
-#define _GUARD_IS_NOT_NONE_POP 356
-#define _GUARD_IS_TRUE_POP 357
-#define _GUARD_KEYS_VERSION 358
-#define _GUARD_NOS_FLOAT 359
-#define _GUARD_NOS_INT 360
-#define _GUARD_NOT_EXHAUSTED_LIST 361
-#define _GUARD_NOT_EXHAUSTED_RANGE 362
-#define _GUARD_NOT_EXHAUSTED_TUPLE 363
-#define _GUARD_TOS_FLOAT 364
-#define _GUARD_TOS_INT 365
-#define _GUARD_TYPE_VERSION 366
-#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 367
-#define _INIT_CALL_PY_EXACT_ARGS 368
-#define _INIT_CALL_PY_EXACT_ARGS_0 369
-#define _INIT_CALL_PY_EXACT_ARGS_1 370
-#define _INIT_CALL_PY_EXACT_ARGS_2 371
-#define _INIT_CALL_PY_EXACT_ARGS_3 372
-#define _INIT_CALL_PY_EXACT_ARGS_4 373
+#define _GUARD_BOTH_FLOAT 349
+#define _GUARD_BOTH_INT 350
+#define _GUARD_BOTH_UNICODE 351
+#define _GUARD_BUILTINS_VERSION 352
+#define _GUARD_DORV_NO_DICT 353
+#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 354
+#define _GUARD_GLOBALS_VERSION 355
+#define _GUARD_IS_FALSE_POP 356
+#define _GUARD_IS_NONE_POP 357
+#define _GUARD_IS_NOT_NONE_POP 358
+#define _GUARD_IS_TRUE_POP 359
+#define _GUARD_KEYS_VERSION 360
+#define _GUARD_NOS_FLOAT 361
+#define _GUARD_NOS_INT 362
+#define _GUARD_NOT_EXHAUSTED_LIST 363
+#define _GUARD_NOT_EXHAUSTED_RANGE 364
+#define _GUARD_NOT_EXHAUSTED_TUPLE 365
+#define _GUARD_TOS_FLOAT 366
+#define _GUARD_TOS_INT 367
+#define _GUARD_TYPE_VERSION 368
+#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 369
+#define _INIT_CALL_PY_EXACT_ARGS 370
+#define _INIT_CALL_PY_EXACT_ARGS_0 371
+#define _INIT_CALL_PY_EXACT_ARGS_1 372
+#define _INIT_CALL_PY_EXACT_ARGS_2 373
+#define _INIT_CALL_PY_EXACT_ARGS_3 374
+#define _INIT_CALL_PY_EXACT_ARGS_4 375
 #define _INSTRUMENTED_CALL INSTRUMENTED_CALL
 #define _INSTRUMENTED_CALL_FUNCTION_EX INSTRUMENTED_CALL_FUNCTION_EX
 #define _INSTRUMENTED_CALL_KW INSTRUMENTED_CALL_KW
@@ -149,65 +150,65 @@ extern "C" {
 #define _INSTRUMENTED_RETURN_CONST INSTRUMENTED_RETURN_CONST
 #define _INSTRUMENTED_RETURN_VALUE INSTRUMENTED_RETURN_VALUE
 #define _INSTRUMENTED_YIELD_VALUE INSTRUMENTED_YIELD_VALUE
-#define _INTERNAL_INCREMENT_OPT_COUNTER 374
-#define _IS_NONE 375
+#define _INTERNAL_INCREMENT_OPT_COUNTER 376
+#define _IS_NONE 377
 #define _IS_OP IS_OP
-#define _ITER_CHECK_LIST 376
-#define _ITER_CHECK_RANGE 377
-#define _ITER_CHECK_TUPLE 378
-#define _ITER_JUMP_LIST 379
-#define _ITER_JUMP_RANGE 380
-#define _ITER_JUMP_TUPLE 381
-#define _ITER_NEXT_LIST 382
-#define _ITER_NEXT_RANGE 383
-#define _ITER_NEXT_TUPLE 384
-#define _JUMP_TO_TOP 385
+#define _ITER_CHECK_LIST 378
+#define _ITER_CHECK_RANGE 379
+#define _ITER_CHECK_TUPLE 380
+#define _ITER_JUMP_LIST 381
+#define _ITER_JUMP_RANGE 382
+#define _ITER_JUMP_TUPLE 383
+#define _ITER_NEXT_LIST 384
+#define _ITER_NEXT_RANGE 385
+#define _ITER_NEXT_TUPLE 386
+#define _JUMP_TO_TOP 387
 #define _LIST_APPEND LIST_APPEND
 #define _LIST_EXTEND LIST_EXTEND
 #define _LOAD_ASSERTION_ERROR LOAD_ASSERTION_ERROR
-#define _LOAD_ATTR 386
-#define _LOAD_ATTR_CLASS 387
-#define _LOAD_ATTR_CLASS_0 388
-#define _LOAD_ATTR_CLASS_1 389
+#define _LOAD_ATTR 388
+#define _LOAD_ATTR_CLASS 389
+#define _LOAD_ATTR_CLASS_0 390
+#define _LOAD_ATTR_CLASS_1 391
 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN
-#define _LOAD_ATTR_INSTANCE_VALUE 390
-#define _LOAD_ATTR_INSTANCE_VALUE_0 391
-#define _LOAD_ATTR_INSTANCE_VALUE_1 392
-#define _LOAD_ATTR_METHOD_LAZY_DICT 393
-#define _LOAD_ATTR_METHOD_NO_DICT 394
-#define _LOAD_ATTR_METHOD_WITH_VALUES 395
-#define _LOAD_ATTR_MODULE 396
-#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 397
-#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 398
+#define _LOAD_ATTR_INSTANCE_VALUE 392
+#define _LOAD_ATTR_INSTANCE_VALUE_0 393
+#define _LOAD_ATTR_INSTANCE_VALUE_1 394
+#define _LOAD_ATTR_METHOD_LAZY_DICT 395
+#define _LOAD_ATTR_METHOD_NO_DICT 396
+#define _LOAD_ATTR_METHOD_WITH_VALUES 397
+#define _LOAD_ATTR_MODULE 398
+#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 399
+#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 400
 #define _LOAD_ATTR_PROPERTY LOAD_ATTR_PROPERTY
-#define _LOAD_ATTR_SLOT 399
-#define _LOAD_ATTR_SLOT_0 400
-#define _LOAD_ATTR_SLOT_1 401
-#define _LOAD_ATTR_WITH_HINT 402
+#define _LOAD_ATTR_SLOT 401
+#define _LOAD_ATTR_SLOT_0 402
+#define _LOAD_ATTR_SLOT_1 403
+#define _LOAD_ATTR_WITH_HINT 404
 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS
 #define _LOAD_CONST LOAD_CONST
-#define _LOAD_CONST_INLINE 403
-#define _LOAD_CONST_INLINE_BORROW 404
-#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 405
-#define _LOAD_CONST_INLINE_WITH_NULL 406
+#define _LOAD_CONST_INLINE 405
+#define _LOAD_CONST_INLINE_BORROW 406
+#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 407
+#define _LOAD_CONST_INLINE_WITH_NULL 408
 #define _LOAD_DEREF LOAD_DEREF
-#define _LOAD_FAST 407
-#define _LOAD_FAST_0 408
-#define _LOAD_FAST_1 409
-#define _LOAD_FAST_2 410
-#define _LOAD_FAST_3 411
-#define _LOAD_FAST_4 412
-#define _LOAD_FAST_5 413
-#define _LOAD_FAST_6 414
-#define _LOAD_FAST_7 415
+#define _LOAD_FAST 409
+#define _LOAD_FAST_0 410
+#define _LOAD_FAST_1 411
+#define _LOAD_FAST_2 412
+#define _LOAD_FAST_3 413
+#define _LOAD_FAST_4 414
+#define _LOAD_FAST_5 415
+#define _LOAD_FAST_6 416
+#define _LOAD_FAST_7 417
 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR
 #define _LOAD_FAST_CHECK LOAD_FAST_CHECK
 #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST
 #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF
 #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS
-#define _LOAD_GLOBAL 416
-#define _LOAD_GLOBAL_BUILTINS 417
-#define _LOAD_GLOBAL_MODULE 418
+#define _LOAD_GLOBAL 418
+#define _LOAD_GLOBAL_BUILTINS 419
+#define _LOAD_GLOBAL_MODULE 420
 #define _LOAD_LOCALS LOAD_LOCALS
 #define _LOAD_NAME LOAD_NAME
 #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR
@@ -221,50 +222,50 @@ extern "C" {
 #define _MATCH_SEQUENCE MATCH_SEQUENCE
 #define _NOP NOP
 #define _POP_EXCEPT POP_EXCEPT
-#define _POP_FRAME 419
-#define _POP_JUMP_IF_FALSE 420
-#define _POP_JUMP_IF_TRUE 421
+#define _POP_FRAME 421
+#define _POP_JUMP_IF_FALSE 422
+#define _POP_JUMP_IF_TRUE 423
 #define _POP_TOP POP_TOP
-#define _POP_TOP_LOAD_CONST_INLINE_BORROW 422
+#define _POP_TOP_LOAD_CONST_INLINE_BORROW 424
 #define _PUSH_EXC_INFO PUSH_EXC_INFO
-#define _PUSH_FRAME 423
+#define _PUSH_FRAME 425
 #define _PUSH_NULL PUSH_NULL
-#define _REPLACE_WITH_TRUE 424
+#define _REPLACE_WITH_TRUE 426
 #define _RESUME_CHECK RESUME_CHECK
 #define _RETURN_GENERATOR RETURN_GENERATOR
-#define _SAVE_RETURN_OFFSET 425
-#define _SEND 426
+#define _SAVE_RETURN_OFFSET 427
+#define _SEND 428
 #define _SEND_GEN SEND_GEN
 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS
 #define _SET_ADD SET_ADD
 #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE
 #define _SET_UPDATE SET_UPDATE
-#define _SIDE_EXIT 427
-#define _START_EXECUTOR 428
-#define _STORE_ATTR 429
-#define _STORE_ATTR_INSTANCE_VALUE 430
-#define _STORE_ATTR_SLOT 431
+#define _SIDE_EXIT 429
+#define _START_EXECUTOR 430
+#define _STORE_ATTR 431
+#define _STORE_ATTR_INSTANCE_VALUE 432
+#define _STORE_ATTR_SLOT 433
 #define _STORE_ATTR_WITH_HINT STORE_ATTR_WITH_HINT
 #define _STORE_DEREF STORE_DEREF
-#define _STORE_FAST 432
-#define _STORE_FAST_0 433
-#define _STORE_FAST_1 434
-#define _STORE_FAST_2 435
-#define _STORE_FAST_3 436
-#define _STORE_FAST_4 437
-#define _STORE_FAST_5 438
-#define _STORE_FAST_6 439
-#define _STORE_FAST_7 440
+#define _STORE_FAST 434
+#define _STORE_FAST_0 435
+#define _STORE_FAST_1 436
+#define _STORE_FAST_2 437
+#define _STORE_FAST_3 438
+#define _STORE_FAST_4 439
+#define _STORE_FAST_5 440
+#define _STORE_FAST_6 441
+#define _STORE_FAST_7 442
 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST
 #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST
 #define _STORE_GLOBAL STORE_GLOBAL
 #define _STORE_NAME STORE_NAME
 #define _STORE_SLICE STORE_SLICE
-#define _STORE_SUBSCR 441
+#define _STORE_SUBSCR 443
 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT
 #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT
 #define _SWAP SWAP
-#define _TO_BOOL 442
+#define _TO_BOOL 444
 #define _TO_BOOL_BOOL TO_BOOL_BOOL
 #define _TO_BOOL_INT TO_BOOL_INT
 #define _TO_BOOL_LIST TO_BOOL_LIST
@@ -274,12 +275,12 @@ extern "C" {
 #define _UNARY_NEGATIVE UNARY_NEGATIVE
 #define _UNARY_NOT UNARY_NOT
 #define _UNPACK_EX UNPACK_EX
-#define _UNPACK_SEQUENCE 443
+#define _UNPACK_SEQUENCE 445
 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST
 #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE
 #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE
 #define _WITH_EXCEPT_START WITH_EXCEPT_START
-#define MAX_UOP_ID 443
+#define MAX_UOP_ID 445
 
 #ifdef __cplusplus
 }
index b8cdfae83914603793bd5efd7aabc9a356a4b8b1..776728d04bce009d855503bfff23bf50937902c7 100644 (file)
@@ -180,6 +180,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_ITER_CHECK_RANGE] = HAS_EXIT_FLAG,
     [_GUARD_NOT_EXHAUSTED_RANGE] = HAS_EXIT_FLAG,
     [_ITER_NEXT_RANGE] = HAS_ERROR_FLAG,
+    [_FOR_ITER_GEN_FRAME] = HAS_ARG_FLAG | HAS_DEOPT_FLAG,
     [_WITH_EXCEPT_START] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_PUSH_EXC_INFO] = 0,
     [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = HAS_DEOPT_FLAG,
@@ -245,6 +246,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_CHECK_FUNCTION] = HAS_DEOPT_FLAG,
     [_INTERNAL_INCREMENT_OPT_COUNTER] = 0,
     [_COLD_EXIT] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG,
+    [_DYNAMIC_EXIT] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG,
     [_START_EXECUTOR] = HAS_DEOPT_FLAG,
     [_FATAL_ERROR] = HAS_ESCAPES_FLAG,
     [_CHECK_VALIDITY_AND_SET_IP] = HAS_DEOPT_FLAG,
@@ -331,6 +333,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
     [_DEOPT] = "_DEOPT",
     [_DICT_MERGE] = "_DICT_MERGE",
     [_DICT_UPDATE] = "_DICT_UPDATE",
+    [_DYNAMIC_EXIT] = "_DYNAMIC_EXIT",
     [_END_SEND] = "_END_SEND",
     [_ERROR_POP_N] = "_ERROR_POP_N",
     [_EXIT_INIT_CHECK] = "_EXIT_INIT_CHECK",
@@ -338,6 +341,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
     [_FATAL_ERROR] = "_FATAL_ERROR",
     [_FORMAT_SIMPLE] = "_FORMAT_SIMPLE",
     [_FORMAT_WITH_SPEC] = "_FORMAT_WITH_SPEC",
+    [_FOR_ITER_GEN_FRAME] = "_FOR_ITER_GEN_FRAME",
     [_FOR_ITER_TIER_TWO] = "_FOR_ITER_TIER_TWO",
     [_GET_AITER] = "_GET_AITER",
     [_GET_ANEXT] = "_GET_ANEXT",
@@ -818,6 +822,8 @@ int _PyUop_num_popped(int opcode, int oparg)
             return 1;
         case _ITER_NEXT_RANGE:
             return 1;
+        case _FOR_ITER_GEN_FRAME:
+            return 1;
         case _WITH_EXCEPT_START:
             return 4;
         case _PUSH_EXC_INFO:
@@ -948,6 +954,8 @@ int _PyUop_num_popped(int opcode, int oparg)
             return 1;
         case _COLD_EXIT:
             return 0;
+        case _DYNAMIC_EXIT:
+            return 0;
         case _START_EXECUTOR:
             return 0;
         case _FATAL_ERROR:
index e2e772a52d764e568ccd1a725fdf47aa78ea4ae9..c798b343626677778feecca67b1b4506db43892c 100644 (file)
@@ -132,7 +132,7 @@ def iter_opnames(ex):
 
 
 def get_opnames(ex):
-    return set(iter_opnames(ex))
+    return list(iter_opnames(ex))
 
 
 @requires_specialization
@@ -1298,5 +1298,20 @@ class TestUopsOptimization(unittest.TestCase):
         self.assertIsNotNone(ex)
         self.assertIn("_RETURN_GENERATOR", get_opnames(ex))
 
+    def test_for_iter_gen(self):
+        def gen(n):
+            for i in range(n):
+                yield i
+        def testfunc(n):
+            g = gen(n)
+            s = 0
+            for i in g:
+                s += i
+            return s
+        res, ex = self._run_with_optimizer(testfunc, 20)
+        self.assertEqual(res, 190)
+        self.assertIsNotNone(ex)
+        self.assertIn("_FOR_ITER_GEN_FRAME", get_opnames(ex))
+
 if __name__ == "__main__":
     unittest.main()
index 485504914912f91712da9cb13d744f8bc8d82fc1..fe3d61362e6b02bde72b207a901e2b985ee37543 100644 (file)
@@ -1109,6 +1109,10 @@ dummy_func(
             _PyFrame_StackPush(frame, retval);
             /* We don't know which of these is relevant here, so keep them equal */
             assert(INLINE_CACHE_ENTRIES_SEND == INLINE_CACHE_ENTRIES_FOR_ITER);
+            assert(_PyOpcode_Deopt[frame->instr_ptr->op.code] == SEND ||
+                   _PyOpcode_Deopt[frame->instr_ptr->op.code] == FOR_ITER ||
+                   _PyOpcode_Deopt[frame->instr_ptr->op.code] == INTERPRETER_EXIT ||
+                   _PyOpcode_Deopt[frame->instr_ptr->op.code] == ENTER_EXECUTOR);
             LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND);
             goto resume_frame;
         }
@@ -2759,24 +2763,26 @@ dummy_func(
             _ITER_JUMP_RANGE +
             _ITER_NEXT_RANGE;
 
-        inst(FOR_ITER_GEN, (unused/1, iter -- iter, unused)) {
-            DEOPT_IF(tstate->interp->eval_frame);
+        op(_FOR_ITER_GEN_FRAME, (iter -- iter, gen_frame: _PyInterpreterFrame*)) {
             PyGenObject *gen = (PyGenObject *)iter;
             DEOPT_IF(Py_TYPE(gen) != &PyGen_Type);
             DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING);
             STAT_INC(FOR_ITER, hit);
-            _PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
+            gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
             _PyFrame_StackPush(gen_frame, Py_None);
             gen->gi_frame_state = FRAME_EXECUTING;
             gen->gi_exc_state.previous_item = tstate->exc_info;
             tstate->exc_info = &gen->gi_exc_state;
-            assert(next_instr[oparg].op.code == END_FOR ||
-                   next_instr[oparg].op.code == INSTRUMENTED_END_FOR);
-            assert(next_instr - this_instr + oparg <= UINT16_MAX);
-            frame->return_offset = (uint16_t)(next_instr - this_instr + oparg);
-            DISPATCH_INLINED(gen_frame);
+            // oparg is the return offset from the next instruction.
+            frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg);
         }
 
+        macro(FOR_ITER_GEN) =
+            unused/1 +
+            _CHECK_PEP_523 +
+            _FOR_ITER_GEN_FRAME +
+            _PUSH_FRAME;
+
         inst(BEFORE_ASYNC_WITH, (mgr -- exit, res)) {
             PyObject *enter = _PyObject_LookupSpecial(mgr, &_Py_ID(__aenter__));
             if (enter == NULL) {
@@ -3166,10 +3172,7 @@ dummy_func(
             }
         }
 
-        // The 'unused' output effect represents the return value
-        // (which will be pushed when the frame returns).
-        // It is needed so CALL_PY_EXACT_ARGS matches its family.
-        op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- unused if (0))) {
+        op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- )) {
             // Write it out explicitly because it's subtly different.
             // Eventually this should be the only occurrence of this code.
             assert(tstate->interp->eval_frame == NULL);
@@ -4189,6 +4192,38 @@ dummy_func(
             GOTO_TIER_TWO(executor);
         }
 
+        tier2 op(_DYNAMIC_EXIT, (--)) {
+            tstate->previous_executor = (PyObject *)current_executor;
+            _PyExitData *exit = (_PyExitData *)&current_executor->exits[oparg];
+            _Py_CODEUNIT *target = frame->instr_ptr;
+            _PyExecutorObject *executor;
+            if (target->op.code == ENTER_EXECUTOR) {
+                PyCodeObject *code = (PyCodeObject *)frame->f_executable;
+                executor = code->co_executors->executors[target->op.arg];
+                Py_INCREF(executor);
+            }
+            else {
+                if (!backoff_counter_triggers(exit->temperature)) {
+                    exit->temperature = advance_backoff_counter(exit->temperature);
+                    GOTO_TIER_ONE(target);
+                }
+                int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor);
+                if (optimized <= 0) {
+                    exit->temperature = restart_backoff_counter(exit->temperature);
+                    if (optimized < 0) {
+                        Py_DECREF(current_executor);
+                        tstate->previous_executor = Py_None;
+                        GOTO_UNWIND();
+                    }
+                    GOTO_TIER_ONE(target);
+                }
+                else {
+                    exit->temperature = initial_temperature_backoff_counter();
+                }
+            }
+            GOTO_TIER_TWO(executor);
+        }
+
         tier2 op(_START_EXECUTOR, (executor/4 --)) {
             Py_DECREF(tstate->previous_executor);
             tstate->previous_executor = NULL;
@@ -4222,6 +4257,7 @@ dummy_func(
             GOTO_UNWIND();
         }
 
+
 // END BYTECODES //
 
     }
index 2f217c5f33c6cef1aa182bebe9b277936a8aaff7..d130c734a67144e71d1a2eff38aa57ac58ba7950 100644 (file)
@@ -1072,9 +1072,13 @@ jump_to_jump_target:
     next_uop = current_executor->trace + target;
     goto tier2_dispatch;
 
+exit_to_tier1_dynamic:
+    next_instr = frame->instr_ptr;
+    goto goto_to_tier1;
 exit_to_tier1:
     assert(next_uop[-1].format == UOP_FORMAT_TARGET);
     next_instr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame));
+goto_to_tier1:
 #ifdef Py_DEBUG
     if (lltrace >= 2) {
         printf("DEOPT: [UOp ");
index 871d1747e2bb8d217b8ee9f89012fe850c23b325..1a8554ab72269fe85d8d82a14eed5c0e70940c71 100644 (file)
@@ -442,3 +442,4 @@ do { \
 #define GOTO_UNWIND() goto error_tier_two
 #define EXIT_TO_TRACE() goto exit_to_trace
 #define EXIT_TO_TIER1() goto exit_to_tier1
+#define EXIT_TO_TIER1_DYNAMIC() goto exit_to_tier1_dynamic;
index 1eb3da9b70002c4245af07a6aed08cbb61e4aa4f..280cca1592ae1892917ba5963362a1c017452053 100644 (file)
             break;
         }
 
-        /* _FOR_ITER_GEN is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */
+        case _FOR_ITER_GEN_FRAME: {
+            PyObject *iter;
+            _PyInterpreterFrame *gen_frame;
+            oparg = CURRENT_OPARG();
+            iter = stack_pointer[-1];
+            PyGenObject *gen = (PyGenObject *)iter;
+            if (Py_TYPE(gen) != &PyGen_Type) {
+                UOP_STAT_INC(uopcode, miss);
+                JUMP_TO_JUMP_TARGET();
+            }
+            if (gen->gi_frame_state >= FRAME_EXECUTING) {
+                UOP_STAT_INC(uopcode, miss);
+                JUMP_TO_JUMP_TARGET();
+            }
+            STAT_INC(FOR_ITER, hit);
+            gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
+            _PyFrame_StackPush(gen_frame, Py_None);
+            gen->gi_frame_state = FRAME_EXECUTING;
+            gen->gi_exc_state.previous_item = tstate->exc_info;
+            tstate->exc_info = &gen->gi_exc_state;
+            // oparg is the return offset from the next instruction.
+            frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg);
+            stack_pointer[0] = (PyObject *)gen_frame;
+            stack_pointer += 1;
+            break;
+        }
 
         /* _BEFORE_ASYNC_WITH is not a viable micro-op for tier 2 because it has both popping and not-popping errors */
 
             break;
         }
 
+        case _DYNAMIC_EXIT: {
+            oparg = CURRENT_OPARG();
+            tstate->previous_executor = (PyObject *)current_executor;
+            _PyExitData *exit = (_PyExitData *)&current_executor->exits[oparg];
+            _Py_CODEUNIT *target = frame->instr_ptr;
+            _PyExecutorObject *executor;
+            if (target->op.code == ENTER_EXECUTOR) {
+                PyCodeObject *code = (PyCodeObject *)frame->f_executable;
+                executor = code->co_executors->executors[target->op.arg];
+                Py_INCREF(executor);
+            }
+            else {
+                if (!backoff_counter_triggers(exit->temperature)) {
+                    exit->temperature = advance_backoff_counter(exit->temperature);
+                    GOTO_TIER_ONE(target);
+                }
+                int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor);
+                if (optimized <= 0) {
+                    exit->temperature = restart_backoff_counter(exit->temperature);
+                    if (optimized < 0) {
+                        Py_DECREF(current_executor);
+                        tstate->previous_executor = Py_None;
+                        GOTO_UNWIND();
+                    }
+                    GOTO_TIER_ONE(target);
+                }
+                else {
+                    exit->temperature = initial_temperature_backoff_counter();
+                }
+            }
+            GOTO_TIER_TWO(executor);
+            break;
+        }
+
         case _START_EXECUTOR: {
             PyObject *executor = (PyObject *)CURRENT_OPERAND();
             Py_DECREF(tstate->previous_executor);
index 0c58f3f87d40412efb01a98eadd52283ac7fc5d8..c27505fde3d9fa68cd456c546f3c352adf73a66c 100644 (file)
         }
 
         TARGET(FOR_ITER_GEN) {
-            _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr;
+            frame->instr_ptr = next_instr;
             next_instr += 2;
             INSTRUCTION_STATS(FOR_ITER_GEN);
             static_assert(INLINE_CACHE_ENTRIES_FOR_ITER == 1, "incorrect cache size");
             PyObject *iter;
+            _PyInterpreterFrame *gen_frame;
+            _PyInterpreterFrame *new_frame;
             /* Skip 1 cache entry */
+            // _CHECK_PEP_523
+            {
+                DEOPT_IF(tstate->interp->eval_frame, FOR_ITER);
+            }
+            // _FOR_ITER_GEN_FRAME
             iter = stack_pointer[-1];
-            DEOPT_IF(tstate->interp->eval_frame, FOR_ITER);
-            PyGenObject *gen = (PyGenObject *)iter;
-            DEOPT_IF(Py_TYPE(gen) != &PyGen_Type, FOR_ITER);
-            DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER);
-            STAT_INC(FOR_ITER, hit);
-            _PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
-            _PyFrame_StackPush(gen_frame, Py_None);
-            gen->gi_frame_state = FRAME_EXECUTING;
-            gen->gi_exc_state.previous_item = tstate->exc_info;
-            tstate->exc_info = &gen->gi_exc_state;
-            assert(next_instr[oparg].op.code == END_FOR ||
-                   next_instr[oparg].op.code == INSTRUMENTED_END_FOR);
-            assert(next_instr - this_instr + oparg <= UINT16_MAX);
-            frame->return_offset = (uint16_t)(next_instr - this_instr + oparg);
-            DISPATCH_INLINED(gen_frame);
+            {
+                PyGenObject *gen = (PyGenObject *)iter;
+                DEOPT_IF(Py_TYPE(gen) != &PyGen_Type, FOR_ITER);
+                DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER);
+                STAT_INC(FOR_ITER, hit);
+                gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
+                _PyFrame_StackPush(gen_frame, Py_None);
+                gen->gi_frame_state = FRAME_EXECUTING;
+                gen->gi_exc_state.previous_item = tstate->exc_info;
+                tstate->exc_info = &gen->gi_exc_state;
+                // oparg is the return offset from the next instruction.
+                frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg);
+            }
+            // _PUSH_FRAME
+            new_frame = gen_frame;
+            {
+                // Write it out explicitly because it's subtly different.
+                // Eventually this should be the only occurrence of this code.
+                assert(tstate->interp->eval_frame == NULL);
+                _PyFrame_SetStackPointer(frame, stack_pointer);
+                new_frame->previous = frame;
+                CALL_STAT_INC(inlined_py_calls);
+                frame = tstate->current_frame = new_frame;
+                tstate->py_recursion_remaining--;
+                LOAD_SP();
+                LOAD_IP(0);
+                LLTRACE_RESUME_FRAME();
+            }
+            DISPATCH();
         }
 
         TARGET(FOR_ITER_LIST) {
             _PyFrame_StackPush(frame, retval);
             /* We don't know which of these is relevant here, so keep them equal */
             assert(INLINE_CACHE_ENTRIES_SEND == INLINE_CACHE_ENTRIES_FOR_ITER);
+            assert(_PyOpcode_Deopt[frame->instr_ptr->op.code] == SEND ||
+                   _PyOpcode_Deopt[frame->instr_ptr->op.code] == FOR_ITER ||
+                   _PyOpcode_Deopt[frame->instr_ptr->op.code] == INTERPRETER_EXIT ||
+                   _PyOpcode_Deopt[frame->instr_ptr->op.code] == ENTER_EXECUTOR);
             LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND);
             goto resume_frame;
         }
index e5c70f72f9c324786539fc71ea1e85cead2b18c4..02c9b3950277910990b46d41360686ab1df92318 100644 (file)
@@ -567,8 +567,6 @@ translate_bytecode_to_trace(
 top:  // Jump here after _PUSH_FRAME or likely branches
     for (;;) {
         target = INSTR_IP(instr, code);
-        RESERVE_RAW(2, "_CHECK_VALIDITY_AND_SET_IP");
-        ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, 0, (uintptr_t)instr, target);
         // Need space for _DEOPT
         max_length--;
 
@@ -597,6 +595,8 @@ top:  // Jump here after _PUSH_FRAME or likely branches
             }
         }
         assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);
+        RESERVE_RAW(2, "_CHECK_VALIDITY_AND_SET_IP");
+        ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, 0, (uintptr_t)instr, target);
 
         /* Special case the first instruction,
          * so that we can guarantee forward progress */
@@ -814,6 +814,12 @@ top:  // Jump here after _PUSH_FRAME or likely branches
                                     ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);
                                     goto done;
                                 }
+                                if (opcode == FOR_ITER_GEN) {
+                                    DPRINTF(2, "Bailing due to dynamic target\n");
+                                    ADD_TO_TRACE(uop, oparg, 0, target);
+                                    ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0);
+                                    goto done;
+                                }
                                 // Increment IP to the return address
                                 instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1;
                                 TRACE_STACK_PUSH();
@@ -847,7 +853,7 @@ top:  // Jump here after _PUSH_FRAME or likely branches
                             }
                             DPRINTF(2, "Bail, new_code == NULL\n");
                             ADD_TO_TRACE(uop, oparg, 0, target);
-                            ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);
+                            ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0);
                             goto done;
                         }
 
@@ -917,7 +923,7 @@ count_exits(_PyUOpInstruction *buffer, int length)
     int exit_count = 0;
     for (int i = 0; i < length; i++) {
         int opcode = buffer[i].opcode;
-        if (opcode == _SIDE_EXIT) {
+        if (opcode == _SIDE_EXIT || opcode == _DYNAMIC_EXIT) {
             exit_count++;
         }
     }
@@ -1114,6 +1120,11 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil
             dest->format = UOP_FORMAT_EXIT;
             next_exit--;
         }
+        if (opcode == _DYNAMIC_EXIT) {
+            executor->exits[next_exit].target = 0;
+            dest->oparg = next_exit;
+            next_exit--;
+        }
     }
     assert(next_exit == -1);
     assert(dest == executor->trace);
index 4f0941a3cc3e095f0e509af3541b2bc0f7112558..b196568770105072274eba4a07d2ab2b35c00f68 100644 (file)
             break;
         }
 
-        /* _FOR_ITER_GEN is not a viable micro-op for tier 2 */
+        case _FOR_ITER_GEN_FRAME: {
+            _PyInterpreterFrame *gen_frame;
+            gen_frame = sym_new_not_null(ctx);
+            if (gen_frame == NULL) goto out_of_space;
+            stack_pointer[0] = (_Py_UopsSymbol *)gen_frame;
+            stack_pointer += 1;
+            break;
+        }
 
         /* _BEFORE_ASYNC_WITH is not a viable micro-op for tier 2 */
 
             break;
         }
 
+        case _DYNAMIC_EXIT: {
+            break;
+        }
+
         case _START_EXECUTOR: {
             break;
         }
index 228dc83254d678cf77aa0813997c1f1c6c0fc5ed..3e81fd15bb80935c68374898a03aee0a9fe96eb5 100644 (file)
@@ -87,6 +87,7 @@ _JIT_ENTRY(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *
     PATCH_VALUE(_PyExecutorObject *, current_executor, _JIT_EXECUTOR)
     int oparg;
     int uopcode = _JIT_OPCODE;
+    _Py_CODEUNIT *next_instr;
     // Other stuff we need handy:
     PATCH_VALUE(uint16_t, _oparg, _JIT_OPARG)
 #if SIZEOF_VOID_P == 8
@@ -122,6 +123,9 @@ error_tier_two:
 exit_to_tier1:
     tstate->previous_executor = (PyObject *)current_executor;
     GOTO_TIER_ONE(_PyCode_CODE(_PyFrame_GetCode(frame)) + _target);
+exit_to_tier1_dynamic:
+    tstate->previous_executor = (PyObject *)current_executor;
+    GOTO_TIER_ONE(frame->instr_ptr);
 exit_to_trace:
     {
         _PyExitData *exit = &current_executor->exits[_exit_index];