]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-116168: Remove extra `_CHECK_STACK_SPACE` uops (#117242)
authorPeter Lazorchak <lazorchakp@gmail.com>
Wed, 3 Apr 2024 17:14:18 +0000 (10:14 -0700)
committerGitHub <noreply@github.com>
Wed, 3 Apr 2024 17:14:18 +0000 (17:14 +0000)
This merges all `_CHECK_STACK_SPACE` uops in a trace into a single `_CHECK_STACK_SPACE_OPERAND` uop that checks whether there is enough stack space for all calls included in the entire trace.

Include/internal/pycore_uop_ids.h
Include/internal/pycore_uop_metadata.h
Lib/test/test_capi/test_opt.py
Modules/_testinternalcapi.c
Python/bytecodes.c
Python/executor_cases.c.h
Python/optimizer_analysis.c
Python/optimizer_cases.c.h

index 54dc6dcf4081161bce8f95f3ca6cc0098d4e9f73..3e4dd8b4009cd4d051ae657449135ebe9a87732c 100644 (file)
@@ -68,14 +68,15 @@ extern "C" {
 #define _CHECK_PEP_523 330
 #define _CHECK_PERIODIC 331
 #define _CHECK_STACK_SPACE 332
-#define _CHECK_VALIDITY 333
-#define _CHECK_VALIDITY_AND_SET_IP 334
-#define _COLD_EXIT 335
-#define _COMPARE_OP 336
-#define _COMPARE_OP_FLOAT 337
-#define _COMPARE_OP_INT 338
-#define _COMPARE_OP_STR 339
-#define _CONTAINS_OP 340
+#define _CHECK_STACK_SPACE_OPERAND 333
+#define _CHECK_VALIDITY 334
+#define _CHECK_VALIDITY_AND_SET_IP 335
+#define _COLD_EXIT 336
+#define _COMPARE_OP 337
+#define _COMPARE_OP_FLOAT 338
+#define _COMPARE_OP_INT 339
+#define _COMPARE_OP_STR 340
+#define _CONTAINS_OP 341
 #define _CONTAINS_OP_DICT CONTAINS_OP_DICT
 #define _CONTAINS_OP_SET CONTAINS_OP_SET
 #define _CONVERT_VALUE CONVERT_VALUE
@@ -87,47 +88,47 @@ extern "C" {
 #define _DELETE_GLOBAL DELETE_GLOBAL
 #define _DELETE_NAME DELETE_NAME
 #define _DELETE_SUBSCR DELETE_SUBSCR
-#define _DEOPT 341
+#define _DEOPT 342
 #define _DICT_MERGE DICT_MERGE
 #define _DICT_UPDATE DICT_UPDATE
 #define _END_SEND END_SEND
-#define _ERROR_POP_N 342
+#define _ERROR_POP_N 343
 #define _EXIT_INIT_CHECK EXIT_INIT_CHECK
-#define _FATAL_ERROR 343
+#define _FATAL_ERROR 344
 #define _FORMAT_SIMPLE FORMAT_SIMPLE
 #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC
-#define _FOR_ITER 344
+#define _FOR_ITER 345
 #define _FOR_ITER_GEN FOR_ITER_GEN
-#define _FOR_ITER_TIER_TWO 345
+#define _FOR_ITER_TIER_TWO 346
 #define _GET_AITER GET_AITER
 #define _GET_ANEXT GET_ANEXT
 #define _GET_AWAITABLE GET_AWAITABLE
 #define _GET_ITER GET_ITER
 #define _GET_LEN GET_LEN
 #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER
-#define _GUARD_BOTH_FLOAT 346
-#define _GUARD_BOTH_INT 347
-#define _GUARD_BOTH_UNICODE 348
-#define _GUARD_BUILTINS_VERSION 349
-#define _GUARD_DORV_NO_DICT 350
-#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 351
-#define _GUARD_GLOBALS_VERSION 352
-#define _GUARD_IS_FALSE_POP 353
-#define _GUARD_IS_NONE_POP 354
-#define _GUARD_IS_NOT_NONE_POP 355
-#define _GUARD_IS_TRUE_POP 356
-#define _GUARD_KEYS_VERSION 357
-#define _GUARD_NOT_EXHAUSTED_LIST 358
-#define _GUARD_NOT_EXHAUSTED_RANGE 359
-#define _GUARD_NOT_EXHAUSTED_TUPLE 360
-#define _GUARD_TYPE_VERSION 361
-#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 362
-#define _INIT_CALL_PY_EXACT_ARGS 363
-#define _INIT_CALL_PY_EXACT_ARGS_0 364
-#define _INIT_CALL_PY_EXACT_ARGS_1 365
-#define _INIT_CALL_PY_EXACT_ARGS_2 366
-#define _INIT_CALL_PY_EXACT_ARGS_3 367
-#define _INIT_CALL_PY_EXACT_ARGS_4 368
+#define _GUARD_BOTH_FLOAT 347
+#define _GUARD_BOTH_INT 348
+#define _GUARD_BOTH_UNICODE 349
+#define _GUARD_BUILTINS_VERSION 350
+#define _GUARD_DORV_NO_DICT 351
+#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 352
+#define _GUARD_GLOBALS_VERSION 353
+#define _GUARD_IS_FALSE_POP 354
+#define _GUARD_IS_NONE_POP 355
+#define _GUARD_IS_NOT_NONE_POP 356
+#define _GUARD_IS_TRUE_POP 357
+#define _GUARD_KEYS_VERSION 358
+#define _GUARD_NOT_EXHAUSTED_LIST 359
+#define _GUARD_NOT_EXHAUSTED_RANGE 360
+#define _GUARD_NOT_EXHAUSTED_TUPLE 361
+#define _GUARD_TYPE_VERSION 362
+#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 363
+#define _INIT_CALL_PY_EXACT_ARGS 364
+#define _INIT_CALL_PY_EXACT_ARGS_0 365
+#define _INIT_CALL_PY_EXACT_ARGS_1 366
+#define _INIT_CALL_PY_EXACT_ARGS_2 367
+#define _INIT_CALL_PY_EXACT_ARGS_3 368
+#define _INIT_CALL_PY_EXACT_ARGS_4 369
 #define _INSTRUMENTED_CALL INSTRUMENTED_CALL
 #define _INSTRUMENTED_CALL_FUNCTION_EX INSTRUMENTED_CALL_FUNCTION_EX
 #define _INSTRUMENTED_CALL_KW INSTRUMENTED_CALL_KW
@@ -144,65 +145,65 @@ extern "C" {
 #define _INSTRUMENTED_RETURN_CONST INSTRUMENTED_RETURN_CONST
 #define _INSTRUMENTED_RETURN_VALUE INSTRUMENTED_RETURN_VALUE
 #define _INSTRUMENTED_YIELD_VALUE INSTRUMENTED_YIELD_VALUE
-#define _INTERNAL_INCREMENT_OPT_COUNTER 369
-#define _IS_NONE 370
+#define _INTERNAL_INCREMENT_OPT_COUNTER 370
+#define _IS_NONE 371
 #define _IS_OP IS_OP
-#define _ITER_CHECK_LIST 371
-#define _ITER_CHECK_RANGE 372
-#define _ITER_CHECK_TUPLE 373
-#define _ITER_JUMP_LIST 374
-#define _ITER_JUMP_RANGE 375
-#define _ITER_JUMP_TUPLE 376
-#define _ITER_NEXT_LIST 377
-#define _ITER_NEXT_RANGE 378
-#define _ITER_NEXT_TUPLE 379
-#define _JUMP_TO_TOP 380
+#define _ITER_CHECK_LIST 372
+#define _ITER_CHECK_RANGE 373
+#define _ITER_CHECK_TUPLE 374
+#define _ITER_JUMP_LIST 375
+#define _ITER_JUMP_RANGE 376
+#define _ITER_JUMP_TUPLE 377
+#define _ITER_NEXT_LIST 378
+#define _ITER_NEXT_RANGE 379
+#define _ITER_NEXT_TUPLE 380
+#define _JUMP_TO_TOP 381
 #define _LIST_APPEND LIST_APPEND
 #define _LIST_EXTEND LIST_EXTEND
 #define _LOAD_ASSERTION_ERROR LOAD_ASSERTION_ERROR
-#define _LOAD_ATTR 381
-#define _LOAD_ATTR_CLASS 382
-#define _LOAD_ATTR_CLASS_0 383
-#define _LOAD_ATTR_CLASS_1 384
+#define _LOAD_ATTR 382
+#define _LOAD_ATTR_CLASS 383
+#define _LOAD_ATTR_CLASS_0 384
+#define _LOAD_ATTR_CLASS_1 385
 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN
-#define _LOAD_ATTR_INSTANCE_VALUE 385
-#define _LOAD_ATTR_INSTANCE_VALUE_0 386
-#define _LOAD_ATTR_INSTANCE_VALUE_1 387
-#define _LOAD_ATTR_METHOD_LAZY_DICT 388
-#define _LOAD_ATTR_METHOD_NO_DICT 389
-#define _LOAD_ATTR_METHOD_WITH_VALUES 390
-#define _LOAD_ATTR_MODULE 391
-#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 392
-#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 393
+#define _LOAD_ATTR_INSTANCE_VALUE 386
+#define _LOAD_ATTR_INSTANCE_VALUE_0 387
+#define _LOAD_ATTR_INSTANCE_VALUE_1 388
+#define _LOAD_ATTR_METHOD_LAZY_DICT 389
+#define _LOAD_ATTR_METHOD_NO_DICT 390
+#define _LOAD_ATTR_METHOD_WITH_VALUES 391
+#define _LOAD_ATTR_MODULE 392
+#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 393
+#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 394
 #define _LOAD_ATTR_PROPERTY LOAD_ATTR_PROPERTY
-#define _LOAD_ATTR_SLOT 394
-#define _LOAD_ATTR_SLOT_0 395
-#define _LOAD_ATTR_SLOT_1 396
-#define _LOAD_ATTR_WITH_HINT 397
+#define _LOAD_ATTR_SLOT 395
+#define _LOAD_ATTR_SLOT_0 396
+#define _LOAD_ATTR_SLOT_1 397
+#define _LOAD_ATTR_WITH_HINT 398
 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS
 #define _LOAD_CONST LOAD_CONST
-#define _LOAD_CONST_INLINE 398
-#define _LOAD_CONST_INLINE_BORROW 399
-#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 400
-#define _LOAD_CONST_INLINE_WITH_NULL 401
+#define _LOAD_CONST_INLINE 399
+#define _LOAD_CONST_INLINE_BORROW 400
+#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 401
+#define _LOAD_CONST_INLINE_WITH_NULL 402
 #define _LOAD_DEREF LOAD_DEREF
-#define _LOAD_FAST 402
-#define _LOAD_FAST_0 403
-#define _LOAD_FAST_1 404
-#define _LOAD_FAST_2 405
-#define _LOAD_FAST_3 406
-#define _LOAD_FAST_4 407
-#define _LOAD_FAST_5 408
-#define _LOAD_FAST_6 409
-#define _LOAD_FAST_7 410
+#define _LOAD_FAST 403
+#define _LOAD_FAST_0 404
+#define _LOAD_FAST_1 405
+#define _LOAD_FAST_2 406
+#define _LOAD_FAST_3 407
+#define _LOAD_FAST_4 408
+#define _LOAD_FAST_5 409
+#define _LOAD_FAST_6 410
+#define _LOAD_FAST_7 411
 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR
 #define _LOAD_FAST_CHECK LOAD_FAST_CHECK
 #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST
 #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF
 #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS
-#define _LOAD_GLOBAL 411
-#define _LOAD_GLOBAL_BUILTINS 412
-#define _LOAD_GLOBAL_MODULE 413
+#define _LOAD_GLOBAL 412
+#define _LOAD_GLOBAL_BUILTINS 413
+#define _LOAD_GLOBAL_MODULE 414
 #define _LOAD_LOCALS LOAD_LOCALS
 #define _LOAD_NAME LOAD_NAME
 #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR
@@ -216,49 +217,49 @@ extern "C" {
 #define _MATCH_SEQUENCE MATCH_SEQUENCE
 #define _NOP NOP
 #define _POP_EXCEPT POP_EXCEPT
-#define _POP_FRAME 414
-#define _POP_JUMP_IF_FALSE 415
-#define _POP_JUMP_IF_TRUE 416
+#define _POP_FRAME 415
+#define _POP_JUMP_IF_FALSE 416
+#define _POP_JUMP_IF_TRUE 417
 #define _POP_TOP POP_TOP
-#define _POP_TOP_LOAD_CONST_INLINE_BORROW 417
+#define _POP_TOP_LOAD_CONST_INLINE_BORROW 418
 #define _PUSH_EXC_INFO PUSH_EXC_INFO
-#define _PUSH_FRAME 418
+#define _PUSH_FRAME 419
 #define _PUSH_NULL PUSH_NULL
-#define _REPLACE_WITH_TRUE 419
+#define _REPLACE_WITH_TRUE 420
 #define _RESUME_CHECK RESUME_CHECK
-#define _SAVE_RETURN_OFFSET 420
-#define _SEND 421
+#define _SAVE_RETURN_OFFSET 421
+#define _SEND 422
 #define _SEND_GEN SEND_GEN
 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS
 #define _SET_ADD SET_ADD
 #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE
 #define _SET_UPDATE SET_UPDATE
-#define _SIDE_EXIT 422
-#define _START_EXECUTOR 423
-#define _STORE_ATTR 424
-#define _STORE_ATTR_INSTANCE_VALUE 425
-#define _STORE_ATTR_SLOT 426
+#define _SIDE_EXIT 423
+#define _START_EXECUTOR 424
+#define _STORE_ATTR 425
+#define _STORE_ATTR_INSTANCE_VALUE 426
+#define _STORE_ATTR_SLOT 427
 #define _STORE_ATTR_WITH_HINT STORE_ATTR_WITH_HINT
 #define _STORE_DEREF STORE_DEREF
-#define _STORE_FAST 427
-#define _STORE_FAST_0 428
-#define _STORE_FAST_1 429
-#define _STORE_FAST_2 430
-#define _STORE_FAST_3 431
-#define _STORE_FAST_4 432
-#define _STORE_FAST_5 433
-#define _STORE_FAST_6 434
-#define _STORE_FAST_7 435
+#define _STORE_FAST 428
+#define _STORE_FAST_0 429
+#define _STORE_FAST_1 430
+#define _STORE_FAST_2 431
+#define _STORE_FAST_3 432
+#define _STORE_FAST_4 433
+#define _STORE_FAST_5 434
+#define _STORE_FAST_6 435
+#define _STORE_FAST_7 436
 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST
 #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST
 #define _STORE_GLOBAL STORE_GLOBAL
 #define _STORE_NAME STORE_NAME
 #define _STORE_SLICE STORE_SLICE
-#define _STORE_SUBSCR 436
+#define _STORE_SUBSCR 437
 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT
 #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT
 #define _SWAP SWAP
-#define _TO_BOOL 437
+#define _TO_BOOL 438
 #define _TO_BOOL_BOOL TO_BOOL_BOOL
 #define _TO_BOOL_INT TO_BOOL_INT
 #define _TO_BOOL_LIST TO_BOOL_LIST
@@ -268,12 +269,12 @@ extern "C" {
 #define _UNARY_NEGATIVE UNARY_NEGATIVE
 #define _UNARY_NOT UNARY_NOT
 #define _UNPACK_EX UNPACK_EX
-#define _UNPACK_SEQUENCE 438
+#define _UNPACK_SEQUENCE 439
 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST
 #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE
 #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE
 #define _WITH_EXCEPT_START WITH_EXCEPT_START
-#define MAX_UOP_ID 438
+#define MAX_UOP_ID 439
 
 #ifdef __cplusplus
 }
index 0f2046fb3d0c3d4eb3c452d9c7c2d57bfac31aa9..111824a938f6cc67693455466e97fdb384d4d52b 100644 (file)
@@ -228,6 +228,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_GUARD_IS_NOT_NONE_POP] = HAS_EXIT_FLAG,
     [_JUMP_TO_TOP] = HAS_EVAL_BREAK_FLAG,
     [_SET_IP] = 0,
+    [_CHECK_STACK_SPACE_OPERAND] = HAS_DEOPT_FLAG,
     [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG,
     [_EXIT_TRACE] = HAS_EXIT_FLAG,
     [_CHECK_VALIDITY] = HAS_DEOPT_FLAG,
@@ -302,6 +303,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
     [_CHECK_PEP_523] = "_CHECK_PEP_523",
     [_CHECK_PERIODIC] = "_CHECK_PERIODIC",
     [_CHECK_STACK_SPACE] = "_CHECK_STACK_SPACE",
+    [_CHECK_STACK_SPACE_OPERAND] = "_CHECK_STACK_SPACE_OPERAND",
     [_CHECK_VALIDITY] = "_CHECK_VALIDITY",
     [_CHECK_VALIDITY_AND_SET_IP] = "_CHECK_VALIDITY_AND_SET_IP",
     [_COLD_EXIT] = "_COLD_EXIT",
@@ -902,6 +904,8 @@ int _PyUop_num_popped(int opcode, int oparg)
             return 0;
         case _SET_IP:
             return 0;
+        case _CHECK_STACK_SPACE_OPERAND:
+            return 0;
         case _SAVE_RETURN_OFFSET:
             return 0;
         case _EXIT_TRACE:
index b59f4b74a8593ee6c70e6247dfddc9c4651b1a09..ceb49c3c7129cb243d3a6707023907dddc862847 100644 (file)
@@ -952,6 +952,269 @@ class TestUopsOptimization(unittest.TestCase):
         _, ex = self._run_with_optimizer(testfunc, 16)
         self.assertIsNone(ex)
 
+    def test_combine_stack_space_checks_sequential(self):
+        def dummy12(x):
+            return x - 1
+        def dummy13(y):
+            z = y + 2
+            return y, z
+        def testfunc(n):
+            a = 0
+            for _ in range(n):
+                b = dummy12(7)
+                c, d = dummy13(9)
+                a += b + c + d
+            return a
+
+        res, ex = self._run_with_optimizer(testfunc, 32)
+        self.assertEqual(res, 832)
+        self.assertIsNotNone(ex)
+
+        uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
+        uop_names = [uop[0] for uop in uops_and_operands]
+        self.assertEqual(uop_names.count("_PUSH_FRAME"), 2)
+        self.assertEqual(uop_names.count("_POP_FRAME"), 2)
+        self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0)
+        self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1)
+        # sequential calls: max(12, 13) == 13
+        largest_stack = _testinternalcapi.get_co_framesize(dummy13.__code__)
+        self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands)
+
+    def test_combine_stack_space_checks_nested(self):
+        def dummy12(x):
+            return x + 3
+        def dummy15(y):
+            z = dummy12(y)
+            return y, z
+        def testfunc(n):
+            a = 0
+            for _ in range(n):
+                b, c = dummy15(2)
+                a += b + c
+            return a
+
+        res, ex = self._run_with_optimizer(testfunc, 32)
+        self.assertEqual(res, 224)
+        self.assertIsNotNone(ex)
+
+        uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
+        uop_names = [uop[0] for uop in uops_and_operands]
+        self.assertEqual(uop_names.count("_PUSH_FRAME"), 2)
+        self.assertEqual(uop_names.count("_POP_FRAME"), 2)
+        self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0)
+        self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1)
+        # nested calls: 15 + 12 == 27
+        largest_stack = (
+            _testinternalcapi.get_co_framesize(dummy15.__code__) +
+            _testinternalcapi.get_co_framesize(dummy12.__code__)
+        )
+        self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands)
+
+    def test_combine_stack_space_checks_several_calls(self):
+        def dummy12(x):
+            return x + 3
+        def dummy13(y):
+            z = y + 2
+            return y, z
+        def dummy18(y):
+            z = dummy12(y)
+            x, w = dummy13(z)
+            return z, x, w
+        def testfunc(n):
+            a = 0
+            for _ in range(n):
+                b = dummy12(5)
+                c, d, e = dummy18(2)
+                a += b + c + d + e
+            return a
+
+        res, ex = self._run_with_optimizer(testfunc, 32)
+        self.assertEqual(res, 800)
+        self.assertIsNotNone(ex)
+
+        uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
+        uop_names = [uop[0] for uop in uops_and_operands]
+        self.assertEqual(uop_names.count("_PUSH_FRAME"), 4)
+        self.assertEqual(uop_names.count("_POP_FRAME"), 4)
+        self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0)
+        self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1)
+        # max(12, 18 + max(12, 13)) == 31
+        largest_stack = (
+            _testinternalcapi.get_co_framesize(dummy18.__code__) +
+            _testinternalcapi.get_co_framesize(dummy13.__code__)
+        )
+        self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands)
+
+    def test_combine_stack_space_checks_several_calls_different_order(self):
+        # same as `several_calls` but with top-level calls reversed
+        def dummy12(x):
+            return x + 3
+        def dummy13(y):
+            z = y + 2
+            return y, z
+        def dummy18(y):
+            z = dummy12(y)
+            x, w = dummy13(z)
+            return z, x, w
+        def testfunc(n):
+            a = 0
+            for _ in range(n):
+                c, d, e = dummy18(2)
+                b = dummy12(5)
+                a += b + c + d + e
+            return a
+
+        res, ex = self._run_with_optimizer(testfunc, 32)
+        self.assertEqual(res, 800)
+        self.assertIsNotNone(ex)
+
+        uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
+        uop_names = [uop[0] for uop in uops_and_operands]
+        self.assertEqual(uop_names.count("_PUSH_FRAME"), 4)
+        self.assertEqual(uop_names.count("_POP_FRAME"), 4)
+        self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0)
+        self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1)
+        # max(18 + max(12, 13), 12) == 31
+        largest_stack = (
+            _testinternalcapi.get_co_framesize(dummy18.__code__) +
+            _testinternalcapi.get_co_framesize(dummy13.__code__)
+        )
+        self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands)
+
+    def test_combine_stack_space_complex(self):
+        def dummy0(x):
+            return x
+        def dummy1(x):
+            return dummy0(x)
+        def dummy2(x):
+            return dummy1(x)
+        def dummy3(x):
+            return dummy0(x)
+        def dummy4(x):
+            y = dummy0(x)
+            return dummy3(y)
+        def dummy5(x):
+            return dummy2(x)
+        def dummy6(x):
+            y = dummy5(x)
+            z = dummy0(y)
+            return dummy4(z)
+        def testfunc(n):
+            a = 0;
+            for _ in range(32):
+                b = dummy5(1)
+                c = dummy0(1)
+                d = dummy6(1)
+                a += b + c + d
+            return a
+
+        res, ex = self._run_with_optimizer(testfunc, 32)
+        self.assertEqual(res, 96)
+        self.assertIsNotNone(ex)
+
+        uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
+        uop_names = [uop[0] for uop in uops_and_operands]
+        self.assertEqual(uop_names.count("_PUSH_FRAME"), 15)
+        self.assertEqual(uop_names.count("_POP_FRAME"), 15)
+
+        self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0)
+        self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1)
+        largest_stack = (
+            _testinternalcapi.get_co_framesize(dummy6.__code__) +
+            _testinternalcapi.get_co_framesize(dummy5.__code__) +
+            _testinternalcapi.get_co_framesize(dummy2.__code__) +
+            _testinternalcapi.get_co_framesize(dummy1.__code__) +
+            _testinternalcapi.get_co_framesize(dummy0.__code__)
+        )
+        self.assertIn(
+            ("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands
+        )
+
+    def test_combine_stack_space_checks_large_framesize(self):
+        # Create a function with a large framesize. This ensures _CHECK_STACK_SPACE is
+        # actually doing its job. Note that the resulting trace hits
+        # UOP_MAX_TRACE_LENGTH, but since all _CHECK_STACK_SPACEs happen early, this
+        # test is still meaningful.
+        repetitions = 10000
+        ns = {}
+        header = """
+            def dummy_large(a0):
+        """
+        body = "".join([f"""
+                a{n+1} = a{n} + 1
+        """ for n in range(repetitions)])
+        return_ = f"""
+                return a{repetitions-1}
+        """
+        exec(textwrap.dedent(header + body + return_), ns, ns)
+        dummy_large = ns['dummy_large']
+
+        # this is something like:
+        #
+        # def dummy_large(a0):
+        #     a1 = a0 + 1
+        #     a2 = a1 + 1
+        #     ....
+        #     a9999 = a9998 + 1
+        #     return a9999
+
+        def dummy15(z):
+            y = dummy_large(z)
+            return y + 3
+
+        def testfunc(n):
+            b = 0
+            for _ in range(n):
+                b += dummy15(7)
+            return b
+
+        res, ex = self._run_with_optimizer(testfunc, 32)
+        self.assertEqual(res, 32 * (repetitions + 9))
+        self.assertIsNotNone(ex)
+
+        uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
+        uop_names = [uop[0] for uop in uops_and_operands]
+        self.assertEqual(uop_names.count("_PUSH_FRAME"), 2)
+        self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1)
+
+        # this hits a different case during trace projection in refcount test runs only,
+        # so we need to account for both possibilities
+        self.assertIn(uop_names.count("_CHECK_STACK_SPACE"), [0, 1])
+        if uop_names.count("_CHECK_STACK_SPACE") == 0:
+            largest_stack = (
+                _testinternalcapi.get_co_framesize(dummy15.__code__) +
+                _testinternalcapi.get_co_framesize(dummy_large.__code__)
+            )
+        else:
+            largest_stack = _testinternalcapi.get_co_framesize(dummy15.__code__)
+        self.assertIn(
+            ("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands
+        )
+
+    def test_combine_stack_space_checks_recursion(self):
+        def dummy15(x):
+            while x > 0:
+                return dummy15(x - 1)
+            return 42
+        def testfunc(n):
+            a = 0
+            for _ in range(n):
+                a += dummy15(n)
+            return a
+
+        res, ex = self._run_with_optimizer(testfunc, 32)
+        self.assertEqual(res, 42 * 32)
+        self.assertIsNotNone(ex)
+
+        uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex]
+        uop_names = [uop[0] for uop in uops_and_operands]
+        self.assertEqual(uop_names.count("_PUSH_FRAME"), 2)
+        self.assertEqual(uop_names.count("_POP_FRAME"), 0)
+        self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 1)
+        self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1)
+        largest_stack = _testinternalcapi.get_co_framesize(dummy15.__code__)
+        self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands)
+
     def test_many_nested(self):
         # overflow the trace_stack
         def dummy_a(x):
@@ -976,8 +1239,9 @@ class TestUopsOptimization(unittest.TestCase):
                 a += dummy_h(n)
             return a
 
-        self._run_with_optimizer(testfunc, 32)
-
+        res, ex = self._run_with_optimizer(testfunc, 32)
+        self.assertEqual(res, 32 * 32)
+        self.assertIsNone(ex)
 
 if __name__ == "__main__":
     unittest.main()
index c5d65a373906f26474f5275c04b578fa4564f213..6b5d99f6ffac1fad88bc66ee6536f13ed4265e2e 100644 (file)
@@ -959,6 +959,17 @@ iframe_getlasti(PyObject *self, PyObject *frame)
     return PyLong_FromLong(PyUnstable_InterpreterFrame_GetLasti(f));
 }
 
+static PyObject *
+get_co_framesize(PyObject *self, PyObject *arg)
+{
+    if (!PyCode_Check(arg)) {
+        PyErr_SetString(PyExc_TypeError, "argument must be a code object");
+        return NULL;
+    }
+    PyCodeObject *code = (PyCodeObject *)arg;
+    return PyLong_FromLong(code->co_framesize);
+}
+
 static PyObject *
 new_counter_optimizer(PyObject *self, PyObject *arg)
 {
@@ -1715,6 +1726,7 @@ static PyMethodDef module_functions[] = {
     {"iframe_getcode", iframe_getcode, METH_O, NULL},
     {"iframe_getline", iframe_getline, METH_O, NULL},
     {"iframe_getlasti", iframe_getlasti, METH_O, NULL},
+    {"get_co_framesize", get_co_framesize, METH_O, NULL},
     {"get_optimizer", get_optimizer,  METH_NOARGS, NULL},
     {"set_optimizer", set_optimizer,  METH_O, NULL},
     {"new_counter_optimizer", new_counter_optimizer, METH_NOARGS, NULL},
index ce208aac9c795336455fe23956f8ae624a45f72f..fa53c969fe361eb9f7f5e04e52122e9883f6fc2f 100644 (file)
@@ -4094,6 +4094,12 @@ dummy_func(
             frame->instr_ptr = (_Py_CODEUNIT *)instr_ptr;
         }
 
+        tier2 op(_CHECK_STACK_SPACE_OPERAND, (framesize/2 --)) {
+            assert(framesize <= INT_MAX);
+            DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, framesize));
+            DEOPT_IF(tstate->py_recursion_remaining <= 1);
+        }
+
         op(_SAVE_RETURN_OFFSET, (--)) {
             #if TIER_ONE
             frame->return_offset = (uint16_t)(next_instr - this_instr);
index 82f2171f1ede83570fc211b9f441aa76fda496b5..98476798fbbbdf88b8407c0c511978b20b0563b9 100644 (file)
             break;
         }
 
+        case _CHECK_STACK_SPACE_OPERAND: {
+            uint32_t framesize = (uint32_t)CURRENT_OPERAND();
+            assert(framesize <= INT_MAX);
+            if (!_PyThreadState_HasStackSpace(tstate, framesize)) JUMP_TO_JUMP_TARGET();
+            if (tstate->py_recursion_remaining <= 1) JUMP_TO_JUMP_TARGET();
+            break;
+        }
+
         case _SAVE_RETURN_OFFSET: {
             oparg = CURRENT_OPARG();
             #if TIER_ONE
index 6f553f8ab8ad2e7668185108cc2f361af64c8e26..a21679f366a74ed5a2f6e14067e006d68cc34c65 100644 (file)
@@ -529,14 +529,41 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
             }
         }
     }
-    Py_FatalError("No terminating instruction");
     Py_UNREACHABLE();
 }
 
+/* _PUSH_FRAME/_POP_FRAME's operand can be 0, a PyFunctionObject *, or a
+ * PyCodeObject *. Retrieve the code object if possible.
+ */
+static PyCodeObject *
+get_co(_PyUOpInstruction *op)
+{
+    assert(op->opcode == _PUSH_FRAME || op->opcode == _POP_FRAME);
+    PyCodeObject *co = NULL;
+    uint64_t operand = op->operand;
+    if (operand == 0) {
+        return NULL;
+    }
+    if (operand & 1) {
+        co = (PyCodeObject *)(operand & ~1);
+    }
+    else {
+        PyFunctionObject *func = (PyFunctionObject *)operand;
+        assert(PyFunction_Check(func));
+        co = (PyCodeObject *)func->func_code;
+    }
+    assert(PyCode_Check(co));
+    return co;
+}
+
 static void
 peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size)
 {
     PyCodeObject *co = _PyFrame_GetCode(frame);
+    int curr_space = 0;
+    int max_space = 0;
+    _PyUOpInstruction *first_valid_check_stack = NULL;
+    _PyUOpInstruction *corresponding_check_stack = NULL;
     for (int pc = 0; pc < buffer_size; pc++) {
         int opcode = buffer[pc].opcode;
         switch(opcode) {
@@ -547,8 +574,7 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s
                 buffer[pc].operand = (uintptr_t)val;
                 break;
             }
-            case _CHECK_PEP_523:
-            {
+            case _CHECK_PEP_523: {
                 /* Setting the eval frame function invalidates
                  * all executors, so no need to check dynamically */
                 if (_PyInterpreterState_GET()->eval_frame == NULL) {
@@ -556,29 +582,72 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s
                 }
                 break;
             }
-            case _PUSH_FRAME:
-            case _POP_FRAME:
-            {
-                uint64_t operand = buffer[pc].operand;
-                if (operand & 1) {
-                    co = (PyCodeObject *)(operand & ~1);
-                    assert(PyCode_Check(co));
-                }
-                else if (operand == 0) {
-                    co = NULL;
+            case _CHECK_STACK_SPACE: {
+                assert(corresponding_check_stack == NULL);
+                corresponding_check_stack = &buffer[pc];
+                break;
+            }
+            case _PUSH_FRAME: {
+                assert(corresponding_check_stack != NULL);
+                co = get_co(&buffer[pc]);
+                if (co == NULL) {
+                    // should be about to _EXIT_TRACE anyway
+                    goto finish;
+                }
+                int framesize = co->co_framesize;
+                assert(framesize > 0);
+                curr_space += framesize;
+                if (curr_space < 0 || curr_space > INT32_MAX) {
+                    // won't fit in signed 32-bit int
+                    goto finish;
+                }
+                max_space = curr_space > max_space ? curr_space : max_space;
+                if (first_valid_check_stack == NULL) {
+                    first_valid_check_stack = corresponding_check_stack;
                 }
                 else {
-                    PyFunctionObject *func = (PyFunctionObject *)operand;
-                    assert(PyFunction_Check(func));
-                    co = (PyCodeObject *)func->func_code;
+                    // delete all but the first valid _CHECK_STACK_SPACE
+                    corresponding_check_stack->opcode = _NOP;
+                }
+                corresponding_check_stack = NULL;
+                break;
+            }
+            case _POP_FRAME: {
+                assert(corresponding_check_stack == NULL);
+                assert(co != NULL);
+                int framesize = co->co_framesize;
+                assert(framesize > 0);
+                assert(framesize <= curr_space);
+                curr_space -= framesize;
+                co = get_co(&buffer[pc]);
+                if (co == NULL) {
+                    // might be impossible, but bailing is still safe
+                    goto finish;
                 }
                 break;
             }
             case _JUMP_TO_TOP:
             case _EXIT_TRACE:
-                return;
+                goto finish;
+#ifdef Py_DEBUG
+            case _CHECK_STACK_SPACE_OPERAND: {
+                /* We should never see _CHECK_STACK_SPACE_OPERANDs.
+                 * They are only created at the end of this pass. */
+                Py_UNREACHABLE();
+            }
+#endif
         }
     }
+    Py_UNREACHABLE();
+finish:
+    if (first_valid_check_stack != NULL) {
+        assert(first_valid_check_stack->opcode == _CHECK_STACK_SPACE);
+        assert(max_space > 0);
+        assert(max_space <= INT_MAX);
+        assert(max_space <= INT32_MAX);
+        first_valid_check_stack->opcode = _CHECK_STACK_SPACE_OPERAND;
+        first_valid_check_stack->operand = max_space;
+    }
 }
 
 //  0 - failure, no error raised, just fall back to Tier 1
index b4a1da8aec14afd0d56ebf9763b75b55366fa662..209be370c4aa38373aa9dd005e3d38c8cf80e4b9 100644 (file)
             break;
         }
 
+        case _CHECK_STACK_SPACE_OPERAND: {
+            break;
+        }
+
         case _SAVE_RETURN_OFFSET: {
             break;
         }