From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sat, 1 Nov 2025 15:36:10 +0000 (+0000) Subject: cold dynamic executors X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=aa92d84c39aa21bd4ef9d22c5411c4334f0b096c;p=thirdparty%2FPython%2Fcpython.git cold dynamic executors --- diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 546cc98d166c..e37cecdd1d54 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -962,6 +962,7 @@ struct _is { struct _PyExecutorObject *executor_list_head; struct _PyExecutorObject *executor_deletion_list_head; struct _PyExecutorObject *cold_executor; + struct _PyExecutorObject *cold_dynamic_executor; int executor_deletion_list_remaining_capacity; size_t executor_creation_counter; _rare_events rare_events; diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 2f51485e93fc..3ea62eab1f46 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -37,6 +37,7 @@ typedef struct { typedef struct _PyExitData { uint32_t target; uint16_t index; + char is_dynamic; _Py_BackoffCounter temperature; struct _PyExecutorObject *executor; } _PyExitData; @@ -340,6 +341,7 @@ static inline _PyExecutorObject *_PyExecutor_FromExit(_PyExitData *exit) } extern _PyExecutorObject *_PyExecutor_GetColdExecutor(void); +extern _PyExecutorObject *_PyExecutor_GetColdDynamicExecutor(void); PyAPI_FUNC(void) _PyExecutor_ClearExit(_PyExitData *exit); @@ -366,7 +368,7 @@ int _PyJit_TryInitializeTracing(PyThreadState *tstate, _PyInterpreterFrame *frame, _Py_CODEUNIT *curr_instr, _Py_CODEUNIT *insert_exec_instr, _Py_CODEUNIT *close_loop_instr, int curr_stackdepth, int chain_depth, _PyExitData *exit, - _PyExecutorObject *prev_exec, int oparg); + _PyExecutorObject *prev_exec, int oparg, bool is_dynamic_target); void _PyJit_FinalizeTracing(PyThreadState *tstate); diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 52aae3b3af64..a390be53e433 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -81,106 +81,109 @@ extern "C" { #define _CHECK_STACK_SPACE 357 #define _CHECK_STACK_SPACE_OPERAND 358 #define _CHECK_VALIDITY 359 -#define _COLD_EXIT 360 -#define _COMPARE_OP 361 -#define _COMPARE_OP_FLOAT 362 -#define _COMPARE_OP_INT 363 -#define _COMPARE_OP_STR 364 -#define _CONTAINS_OP 365 -#define _CONTAINS_OP_DICT 366 -#define _CONTAINS_OP_SET 367 +#define _COLD_DYNAMIC_EXIT 360 +#define _COLD_EXIT 361 +#define _COMPARE_OP 362 +#define _COMPARE_OP_FLOAT 363 +#define _COMPARE_OP_INT 364 +#define _COMPARE_OP_STR 365 +#define _CONTAINS_OP 366 +#define _CONTAINS_OP_DICT 367 +#define _CONTAINS_OP_SET 368 #define _CONVERT_VALUE CONVERT_VALUE -#define _COPY 368 -#define _COPY_1 369 -#define _COPY_2 370 -#define _COPY_3 371 +#define _COPY 369 +#define _COPY_1 370 +#define _COPY_2 371 +#define _COPY_3 372 #define _COPY_FREE_VARS COPY_FREE_VARS -#define _CREATE_INIT_FRAME 372 +#define _CREATE_INIT_FRAME 373 #define _DELETE_ATTR DELETE_ATTR #define _DELETE_DEREF DELETE_DEREF #define _DELETE_FAST DELETE_FAST #define _DELETE_GLOBAL DELETE_GLOBAL #define _DELETE_NAME DELETE_NAME #define _DELETE_SUBSCR DELETE_SUBSCR -#define _DEOPT 373 +#define _DEOPT 374 #define _DICT_MERGE DICT_MERGE #define _DICT_UPDATE DICT_UPDATE -#define _DO_CALL 374 -#define _DO_CALL_FUNCTION_EX 375 -#define _DO_CALL_KW 376 -#define _DYNAMIC_EXIT 377 +#define _DO_CALL 375 +#define _DO_CALL_FUNCTION_EX 376 +#define _DO_CALL_KW 377 +#define _DYNAMIC_DEOPT 378 +#define _DYNAMIC_EXIT 379 #define _END_FOR END_FOR #define _END_SEND END_SEND -#define _ERROR_POP_N 378 +#define _ERROR_POP_N 380 #define _EXIT_INIT_CHECK EXIT_INIT_CHECK -#define _EXPAND_METHOD 379 -#define _EXPAND_METHOD_KW 380 -#define _FATAL_ERROR 381 +#define _EXPAND_METHOD 381 +#define _EXPAND_METHOD_KW 382 +#define _FATAL_ERROR 383 #define _FORMAT_SIMPLE FORMAT_SIMPLE #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC -#define _FOR_ITER 382 -#define _FOR_ITER_GEN_FRAME 383 -#define _FOR_ITER_TIER_TWO 384 +#define _FOR_ITER 384 +#define _FOR_ITER_GEN_FRAME 385 +#define _FOR_ITER_TIER_TWO 386 #define _GET_AITER GET_AITER #define _GET_ANEXT GET_ANEXT #define _GET_AWAITABLE GET_AWAITABLE #define _GET_ITER GET_ITER #define _GET_LEN GET_LEN #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER -#define _GUARD_BINARY_OP_EXTEND 385 -#define _GUARD_CALLABLE_ISINSTANCE 386 -#define _GUARD_CALLABLE_LEN 387 -#define _GUARD_CALLABLE_LIST_APPEND 388 -#define _GUARD_CALLABLE_STR_1 389 -#define _GUARD_CALLABLE_TUPLE_1 390 -#define _GUARD_CALLABLE_TYPE_1 391 -#define _GUARD_DORV_NO_DICT 392 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 393 -#define _GUARD_GLOBALS_VERSION 394 -#define _GUARD_IP_RETURN_GENERATOR 395 -#define _GUARD_IP_RETURN_VALUE 396 -#define _GUARD_IP_YIELD_VALUE 397 -#define _GUARD_IP__PUSH_FRAME 398 -#define _GUARD_IS_FALSE_POP 399 -#define _GUARD_IS_NONE_POP 400 -#define _GUARD_IS_NOT_NONE_POP 401 -#define _GUARD_IS_TRUE_POP 402 -#define _GUARD_KEYS_VERSION 403 -#define _GUARD_NOS_DICT 404 -#define _GUARD_NOS_FLOAT 405 -#define _GUARD_NOS_INT 406 -#define _GUARD_NOS_LIST 407 -#define _GUARD_NOS_NOT_NULL 408 -#define _GUARD_NOS_NULL 409 -#define _GUARD_NOS_OVERFLOWED 410 -#define _GUARD_NOS_TUPLE 411 -#define _GUARD_NOS_UNICODE 412 -#define _GUARD_NOT_EXHAUSTED_LIST 413 -#define _GUARD_NOT_EXHAUSTED_RANGE 414 -#define _GUARD_NOT_EXHAUSTED_TUPLE 415 -#define _GUARD_THIRD_NULL 416 -#define _GUARD_TOS_ANY_SET 417 -#define _GUARD_TOS_DICT 418 -#define _GUARD_TOS_FLOAT 419 -#define _GUARD_TOS_INT 420 -#define _GUARD_TOS_LIST 421 -#define _GUARD_TOS_OVERFLOWED 422 -#define _GUARD_TOS_SLICE 423 -#define _GUARD_TOS_TUPLE 424 -#define _GUARD_TOS_UNICODE 425 -#define _GUARD_TYPE_VERSION 426 -#define _GUARD_TYPE_VERSION_AND_LOCK 427 -#define _HANDLE_PENDING_AND_DEOPT 428 +#define _GUARD_BINARY_OP_EXTEND 387 +#define _GUARD_CALLABLE_ISINSTANCE 388 +#define _GUARD_CALLABLE_LEN 389 +#define _GUARD_CALLABLE_LIST_APPEND 390 +#define _GUARD_CALLABLE_STR_1 391 +#define _GUARD_CALLABLE_TUPLE_1 392 +#define _GUARD_CALLABLE_TYPE_1 393 +#define _GUARD_DORV_NO_DICT 394 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 395 +#define _GUARD_EXECUTOR_IP 396 +#define _GUARD_GLOBALS_VERSION 397 +#define _GUARD_IP_RETURN_GENERATOR 398 +#define _GUARD_IP_RETURN_VALUE 399 +#define _GUARD_IP_YIELD_VALUE 400 +#define _GUARD_IP__PUSH_FRAME 401 +#define _GUARD_IS_FALSE_POP 402 +#define _GUARD_IS_NONE_POP 403 +#define _GUARD_IS_NOT_NONE_POP 404 +#define _GUARD_IS_TRUE_POP 405 +#define _GUARD_KEYS_VERSION 406 +#define _GUARD_NOS_DICT 407 +#define _GUARD_NOS_FLOAT 408 +#define _GUARD_NOS_INT 409 +#define _GUARD_NOS_LIST 410 +#define _GUARD_NOS_NOT_NULL 411 +#define _GUARD_NOS_NULL 412 +#define _GUARD_NOS_OVERFLOWED 413 +#define _GUARD_NOS_TUPLE 414 +#define _GUARD_NOS_UNICODE 415 +#define _GUARD_NOT_EXHAUSTED_LIST 416 +#define _GUARD_NOT_EXHAUSTED_RANGE 417 +#define _GUARD_NOT_EXHAUSTED_TUPLE 418 +#define _GUARD_THIRD_NULL 419 +#define _GUARD_TOS_ANY_SET 420 +#define _GUARD_TOS_DICT 421 +#define _GUARD_TOS_FLOAT 422 +#define _GUARD_TOS_INT 423 +#define _GUARD_TOS_LIST 424 +#define _GUARD_TOS_OVERFLOWED 425 +#define _GUARD_TOS_SLICE 426 +#define _GUARD_TOS_TUPLE 427 +#define _GUARD_TOS_UNICODE 428 +#define _GUARD_TYPE_VERSION 429 +#define _GUARD_TYPE_VERSION_AND_LOCK 430 +#define _HANDLE_PENDING_AND_DEOPT 431 #define _IMPORT_FROM IMPORT_FROM #define _IMPORT_NAME IMPORT_NAME -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 429 -#define _INIT_CALL_PY_EXACT_ARGS 430 -#define _INIT_CALL_PY_EXACT_ARGS_0 431 -#define _INIT_CALL_PY_EXACT_ARGS_1 432 -#define _INIT_CALL_PY_EXACT_ARGS_2 433 -#define _INIT_CALL_PY_EXACT_ARGS_3 434 -#define _INIT_CALL_PY_EXACT_ARGS_4 435 -#define _INSERT_NULL 436 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 432 +#define _INIT_CALL_PY_EXACT_ARGS 433 +#define _INIT_CALL_PY_EXACT_ARGS_0 434 +#define _INIT_CALL_PY_EXACT_ARGS_1 435 +#define _INIT_CALL_PY_EXACT_ARGS_2 436 +#define _INIT_CALL_PY_EXACT_ARGS_3 437 +#define _INIT_CALL_PY_EXACT_ARGS_4 438 +#define _INSERT_NULL 439 #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER #define _INSTRUMENTED_INSTRUCTION INSTRUMENTED_INSTRUCTION #define _INSTRUMENTED_JUMP_FORWARD INSTRUMENTED_JUMP_FORWARD @@ -190,178 +193,179 @@ extern "C" { #define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE -#define _IS_NONE 437 +#define _IS_NONE 440 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 438 -#define _ITER_CHECK_RANGE 439 -#define _ITER_CHECK_TUPLE 440 -#define _ITER_JUMP_LIST 441 -#define _ITER_JUMP_RANGE 442 -#define _ITER_JUMP_TUPLE 443 -#define _ITER_NEXT_LIST 444 -#define _ITER_NEXT_LIST_TIER_TWO 445 -#define _ITER_NEXT_RANGE 446 -#define _ITER_NEXT_TUPLE 447 +#define _ITER_CHECK_LIST 441 +#define _ITER_CHECK_RANGE 442 +#define _ITER_CHECK_TUPLE 443 +#define _ITER_JUMP_LIST 444 +#define _ITER_JUMP_RANGE 445 +#define _ITER_JUMP_TUPLE 446 +#define _ITER_NEXT_LIST 447 +#define _ITER_NEXT_LIST_TIER_TWO 448 +#define _ITER_NEXT_RANGE 449 +#define _ITER_NEXT_TUPLE 450 #define _JUMP_BACKWARD_NO_INTERRUPT JUMP_BACKWARD_NO_INTERRUPT -#define _JUMP_TO_TOP 448 +#define _JUMP_TO_TOP 451 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 449 -#define _LOAD_ATTR_CLASS 450 +#define _LOAD_ATTR 452 +#define _LOAD_ATTR_CLASS 453 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 451 -#define _LOAD_ATTR_METHOD_LAZY_DICT 452 -#define _LOAD_ATTR_METHOD_NO_DICT 453 -#define _LOAD_ATTR_METHOD_WITH_VALUES 454 -#define _LOAD_ATTR_MODULE 455 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 456 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 457 -#define _LOAD_ATTR_PROPERTY_FRAME 458 -#define _LOAD_ATTR_SLOT 459 -#define _LOAD_ATTR_WITH_HINT 460 +#define _LOAD_ATTR_INSTANCE_VALUE 454 +#define _LOAD_ATTR_METHOD_LAZY_DICT 455 +#define _LOAD_ATTR_METHOD_NO_DICT 456 +#define _LOAD_ATTR_METHOD_WITH_VALUES 457 +#define _LOAD_ATTR_MODULE 458 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 459 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 460 +#define _LOAD_ATTR_PROPERTY_FRAME 461 +#define _LOAD_ATTR_SLOT 462 +#define _LOAD_ATTR_WITH_HINT 463 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS -#define _LOAD_BYTECODE 461 +#define _LOAD_BYTECODE 464 #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 462 -#define _LOAD_CONST_INLINE_BORROW 463 -#define _LOAD_CONST_UNDER_INLINE 464 -#define _LOAD_CONST_UNDER_INLINE_BORROW 465 +#define _LOAD_CONST_INLINE 465 +#define _LOAD_CONST_INLINE_BORROW 466 +#define _LOAD_CONST_UNDER_INLINE 467 +#define _LOAD_CONST_UNDER_INLINE_BORROW 468 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 466 -#define _LOAD_FAST_0 467 -#define _LOAD_FAST_1 468 -#define _LOAD_FAST_2 469 -#define _LOAD_FAST_3 470 -#define _LOAD_FAST_4 471 -#define _LOAD_FAST_5 472 -#define _LOAD_FAST_6 473 -#define _LOAD_FAST_7 474 +#define _LOAD_FAST 469 +#define _LOAD_FAST_0 470 +#define _LOAD_FAST_1 471 +#define _LOAD_FAST_2 472 +#define _LOAD_FAST_3 473 +#define _LOAD_FAST_4 474 +#define _LOAD_FAST_5 475 +#define _LOAD_FAST_6 476 +#define _LOAD_FAST_7 477 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR -#define _LOAD_FAST_BORROW 475 -#define _LOAD_FAST_BORROW_0 476 -#define _LOAD_FAST_BORROW_1 477 -#define _LOAD_FAST_BORROW_2 478 -#define _LOAD_FAST_BORROW_3 479 -#define _LOAD_FAST_BORROW_4 480 -#define _LOAD_FAST_BORROW_5 481 -#define _LOAD_FAST_BORROW_6 482 -#define _LOAD_FAST_BORROW_7 483 +#define _LOAD_FAST_BORROW 478 +#define _LOAD_FAST_BORROW_0 479 +#define _LOAD_FAST_BORROW_1 480 +#define _LOAD_FAST_BORROW_2 481 +#define _LOAD_FAST_BORROW_3 482 +#define _LOAD_FAST_BORROW_4 483 +#define _LOAD_FAST_BORROW_5 484 +#define _LOAD_FAST_BORROW_6 485 +#define _LOAD_FAST_BORROW_7 486 #define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 484 -#define _LOAD_GLOBAL_BUILTINS 485 -#define _LOAD_GLOBAL_MODULE 486 +#define _LOAD_GLOBAL 487 +#define _LOAD_GLOBAL_BUILTINS 488 +#define _LOAD_GLOBAL_MODULE 489 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 487 -#define _LOAD_SMALL_INT_0 488 -#define _LOAD_SMALL_INT_1 489 -#define _LOAD_SMALL_INT_2 490 -#define _LOAD_SMALL_INT_3 491 -#define _LOAD_SPECIAL 492 +#define _LOAD_SMALL_INT 490 +#define _LOAD_SMALL_INT_0 491 +#define _LOAD_SMALL_INT_1 492 +#define _LOAD_SMALL_INT_2 493 +#define _LOAD_SMALL_INT_3 494 +#define _LOAD_SPECIAL 495 #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 493 +#define _MAKE_CALLARGS_A_TUPLE 496 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 494 +#define _MAKE_WARM 497 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 495 -#define _MAYBE_EXPAND_METHOD_KW 496 -#define _MONITOR_CALL 497 -#define _MONITOR_CALL_KW 498 -#define _MONITOR_JUMP_BACKWARD 499 -#define _MONITOR_RESUME 500 +#define _MAYBE_EXPAND_METHOD 498 +#define _MAYBE_EXPAND_METHOD_KW 499 +#define _MONITOR_CALL 500 +#define _MONITOR_CALL_KW 501 +#define _MONITOR_JUMP_BACKWARD 502 +#define _MONITOR_RESUME 503 #define _NOP NOP -#define _POP_CALL 501 -#define _POP_CALL_LOAD_CONST_INLINE_BORROW 502 -#define _POP_CALL_ONE 503 -#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 504 -#define _POP_CALL_TWO 505 -#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 506 +#define _POP_CALL 504 +#define _POP_CALL_LOAD_CONST_INLINE_BORROW 505 +#define _POP_CALL_ONE 506 +#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 507 +#define _POP_CALL_TWO 508 +#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 509 #define _POP_EXCEPT POP_EXCEPT #define _POP_ITER POP_ITER -#define _POP_JUMP_IF_FALSE 507 -#define _POP_JUMP_IF_TRUE 508 +#define _POP_JUMP_IF_FALSE 510 +#define _POP_JUMP_IF_TRUE 511 #define _POP_TOP POP_TOP -#define _POP_TOP_FLOAT 509 -#define _POP_TOP_INT 510 -#define _POP_TOP_LOAD_CONST_INLINE 511 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 512 -#define _POP_TOP_NOP 513 -#define _POP_TOP_UNICODE 514 -#define _POP_TWO 515 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 516 +#define _POP_TOP_FLOAT 512 +#define _POP_TOP_INT 513 +#define _POP_TOP_LOAD_CONST_INLINE 514 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 515 +#define _POP_TOP_NOP 516 +#define _POP_TOP_UNICODE 517 +#define _POP_TWO 518 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 519 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 517 +#define _PUSH_FRAME 520 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 518 -#define _PY_FRAME_GENERAL 519 -#define _PY_FRAME_KW 520 -#define _QUICKEN_RESUME 521 -#define _REPLACE_WITH_TRUE 522 +#define _PUSH_NULL_CONDITIONAL 521 +#define _PY_FRAME_GENERAL 522 +#define _PY_FRAME_KW 523 +#define _QUICKEN_RESUME 524 +#define _REPLACE_WITH_TRUE 525 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 523 -#define _SEND 524 -#define _SEND_GEN_FRAME 525 +#define _SAVE_RETURN_OFFSET 526 +#define _SEND 527 +#define _SEND_GEN_FRAME 528 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 526 -#define _STORE_ATTR 527 -#define _STORE_ATTR_INSTANCE_VALUE 528 -#define _STORE_ATTR_SLOT 529 -#define _STORE_ATTR_WITH_HINT 530 +#define _START_DYNAMIC_EXECUTOR 529 +#define _START_EXECUTOR 530 +#define _STORE_ATTR 531 +#define _STORE_ATTR_INSTANCE_VALUE 532 +#define _STORE_ATTR_SLOT 533 +#define _STORE_ATTR_WITH_HINT 534 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 531 -#define _STORE_FAST_0 532 -#define _STORE_FAST_1 533 -#define _STORE_FAST_2 534 -#define _STORE_FAST_3 535 -#define _STORE_FAST_4 536 -#define _STORE_FAST_5 537 -#define _STORE_FAST_6 538 -#define _STORE_FAST_7 539 +#define _STORE_FAST 535 +#define _STORE_FAST_0 536 +#define _STORE_FAST_1 537 +#define _STORE_FAST_2 538 +#define _STORE_FAST_3 539 +#define _STORE_FAST_4 540 +#define _STORE_FAST_5 541 +#define _STORE_FAST_6 542 +#define _STORE_FAST_7 543 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 540 -#define _STORE_SUBSCR 541 -#define _STORE_SUBSCR_DICT 542 -#define _STORE_SUBSCR_LIST_INT 543 -#define _SWAP 544 -#define _SWAP_2 545 -#define _SWAP_3 546 -#define _TIER2_RESUME_CHECK 547 -#define _TO_BOOL 548 +#define _STORE_SLICE 544 +#define _STORE_SUBSCR 545 +#define _STORE_SUBSCR_DICT 546 +#define _STORE_SUBSCR_LIST_INT 547 +#define _SWAP 548 +#define _SWAP_2 549 +#define _SWAP_3 550 +#define _TIER2_RESUME_CHECK 551 +#define _TO_BOOL 552 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 549 +#define _TO_BOOL_LIST 553 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 550 +#define _TO_BOOL_STR 554 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 551 -#define _UNPACK_SEQUENCE_LIST 552 -#define _UNPACK_SEQUENCE_TUPLE 553 -#define _UNPACK_SEQUENCE_TWO_TUPLE 554 +#define _UNPACK_SEQUENCE 555 +#define _UNPACK_SEQUENCE_LIST 556 +#define _UNPACK_SEQUENCE_TUPLE 557 +#define _UNPACK_SEQUENCE_TWO_TUPLE 558 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 554 +#define MAX_UOP_ID 558 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 148b3d9c1fc9..348ea644eee2 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -315,6 +315,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_STACK_SPACE_OPERAND] = HAS_DEOPT_FLAG, [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG, [_EXIT_TRACE] = HAS_ESCAPES_FLAG, + [_DYNAMIC_EXIT] = HAS_ESCAPES_FLAG, + [_DYNAMIC_DEOPT] = 0, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, [_LOAD_CONST_INLINE] = HAS_PURE_FLAG, [_POP_TOP_LOAD_CONST_INLINE] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, @@ -330,6 +332,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_CONST_UNDER_INLINE] = 0, [_LOAD_CONST_UNDER_INLINE_BORROW] = 0, [_START_EXECUTOR] = HAS_DEOPT_FLAG, + [_START_DYNAMIC_EXECUTOR] = HAS_DEOPT_FLAG, [_MAKE_WARM] = 0, [_FATAL_ERROR] = 0, [_DEOPT] = 0, @@ -337,11 +340,12 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_ERROR_POP_N] = HAS_ARG_FLAG, [_TIER2_RESUME_CHECK] = HAS_PERIODIC_FLAG, [_COLD_EXIT] = 0, + [_COLD_DYNAMIC_EXIT] = 0, + [_GUARD_EXECUTOR_IP] = HAS_EXIT_FLAG, [_GUARD_IP__PUSH_FRAME] = HAS_EXIT_FLAG, [_GUARD_IP_YIELD_VALUE] = HAS_EXIT_FLAG, [_GUARD_IP_RETURN_VALUE] = HAS_EXIT_FLAG, [_GUARD_IP_RETURN_GENERATOR] = HAS_EXIT_FLAG, - [_DYNAMIC_EXIT] = HAS_ESCAPES_FLAG, }; const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = { @@ -424,6 +428,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_CHECK_STACK_SPACE] = "_CHECK_STACK_SPACE", [_CHECK_STACK_SPACE_OPERAND] = "_CHECK_STACK_SPACE_OPERAND", [_CHECK_VALIDITY] = "_CHECK_VALIDITY", + [_COLD_DYNAMIC_EXIT] = "_COLD_DYNAMIC_EXIT", [_COLD_EXIT] = "_COLD_EXIT", [_COMPARE_OP] = "_COMPARE_OP", [_COMPARE_OP_FLOAT] = "_COMPARE_OP_FLOAT", @@ -448,6 +453,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_DEOPT] = "_DEOPT", [_DICT_MERGE] = "_DICT_MERGE", [_DICT_UPDATE] = "_DICT_UPDATE", + [_DYNAMIC_DEOPT] = "_DYNAMIC_DEOPT", [_DYNAMIC_EXIT] = "_DYNAMIC_EXIT", [_END_FOR] = "_END_FOR", [_END_SEND] = "_END_SEND", @@ -476,6 +482,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GUARD_CALLABLE_TYPE_1] = "_GUARD_CALLABLE_TYPE_1", [_GUARD_DORV_NO_DICT] = "_GUARD_DORV_NO_DICT", [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = "_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT", + [_GUARD_EXECUTOR_IP] = "_GUARD_EXECUTOR_IP", [_GUARD_GLOBALS_VERSION] = "_GUARD_GLOBALS_VERSION", [_GUARD_IP_RETURN_GENERATOR] = "_GUARD_IP_RETURN_GENERATOR", [_GUARD_IP_RETURN_VALUE] = "_GUARD_IP_RETURN_VALUE", @@ -634,6 +641,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_SET_FUNCTION_ATTRIBUTE] = "_SET_FUNCTION_ATTRIBUTE", [_SET_IP] = "_SET_IP", [_SET_UPDATE] = "_SET_UPDATE", + [_START_DYNAMIC_EXECUTOR] = "_START_DYNAMIC_EXECUTOR", [_START_EXECUTOR] = "_START_EXECUTOR", [_STORE_ATTR] = "_STORE_ATTR", [_STORE_ATTR_INSTANCE_VALUE] = "_STORE_ATTR_INSTANCE_VALUE", @@ -1271,6 +1279,10 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _EXIT_TRACE: return 0; + case _DYNAMIC_EXIT: + return 0; + case _DYNAMIC_DEOPT: + return 0; case _CHECK_VALIDITY: return 0; case _LOAD_CONST_INLINE: @@ -1301,6 +1313,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 1; case _START_EXECUTOR: return 0; + case _START_DYNAMIC_EXECUTOR: + return 0; case _MAKE_WARM: return 0; case _FATAL_ERROR: @@ -1315,6 +1329,10 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _COLD_EXIT: return 0; + case _COLD_DYNAMIC_EXIT: + return 0; + case _GUARD_EXECUTOR_IP: + return 0; case _GUARD_IP__PUSH_FRAME: return 0; case _GUARD_IP_YIELD_VALUE: @@ -1323,8 +1341,6 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _GUARD_IP_RETURN_GENERATOR: return 0; - case _DYNAMIC_EXIT: - return 0; default: return -1; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index b5d4468fe94f..8c73f35acfe4 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2984,7 +2984,7 @@ dummy_func( oparg >>= 8; insert_exec_at--; } - int succ = _PyJit_TryInitializeTracing(tstate, frame, this_instr, insert_exec_at, next_instr, STACK_LEVEL(), 0, NULL, NULL, oparg); + int succ = _PyJit_TryInitializeTracing(tstate, frame, this_instr, insert_exec_at, next_instr, STACK_LEVEL(), 0, NULL, NULL, oparg, false); if (succ) { ENTER_TRACING(); } @@ -5283,6 +5283,32 @@ dummy_func( TIER2_TO_TIER2(exit->executor); } + // Note: this is different than _COLD_EXIT/_EXIT_TRACE, as it may lead to multiple executors + // from a single exit! + tier2 op(_DYNAMIC_EXIT, (exit_p/4 --)) { + _PyExitData *exit = (_PyExitData *)exit_p; + #if defined(Py_DEBUG) && !defined(_Py_JIT) + _Py_CODEUNIT *target = frame->instr_ptr; + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); + if (frame->lltrace >= 2) { + printf("DYNAMIC EXIT: [UOp "); + _PyUOpPrint(&next_uop[-1]); + printf(", exit %tu, temp %d, target %d -> %s]\n", + exit - current_executor->exits, exit->temperature.value_and_backoff, + (int)(target - _PyFrame_GetBytecode(frame)), + _PyOpcode_OpName[target->op.code]); + } + #endif + tstate->jit_exit = exit; + assert(exit->is_dynamic); + _PyExecutorObject *exec = exit->executor; + TIER2_TO_TIER2(exec); + } + + tier2 op(_DYNAMIC_DEOPT, (--)) { + GOTO_TIER_ONE(frame->instr_ptr); + } + tier2 op(_CHECK_VALIDITY, (--)) { DEOPT_IF(!current_executor->vm_data.valid); } @@ -5382,6 +5408,21 @@ dummy_func( } } + tier2 op(_START_DYNAMIC_EXECUTOR, (executor/4 --)) { +#ifndef _Py_JIT + assert(current_executor == (_PyExecutorObject*)executor); +#endif + assert(tstate->jit_exit != NULL || tstate->jit_exit->executor == current_executor); + tstate->current_executor = (PyObject *)executor; + if (!current_executor->vm_data.valid) { + assert(tstate->jit_exit->executor == current_executor); + assert(tstate->current_executor == executor); + _PyExecutor_ClearExit(tstate->jit_exit); + // Note: this points to _DYNAMIC_DEOPT!!! + DEOPT_IF(true); + } + } + tier2 op(_MAKE_WARM, (--)) { current_executor->vm_data.warm = true; } @@ -5445,10 +5486,11 @@ dummy_func( // Note: it's safe to use target->op.arg here instead of the oparg given by EXTENDED_ARG. // The invariant in the optimizer is the deopt target always points back to the first EXTENDED_ARG. // So setting it to anything else is wrong. - int succ = _PyJit_TryInitializeTracing(tstate, frame, target, target, target, STACK_LEVEL(), chain_depth, exit, previous_executor, target->op.arg); + int succ = _PyJit_TryInitializeTracing(tstate, frame, target, target, target, STACK_LEVEL(), chain_depth, exit, previous_executor, target->op.arg, false); if (succ) { GOTO_TIER_ONE_CONTINUE_TRACING(target); } + exit->temperature = restart_backoff_counter(exit->temperature); GOTO_TIER_ONE(target); } assert(tstate->jit_exit == exit); @@ -5456,6 +5498,41 @@ dummy_func( TIER2_TO_TIER2(exit->executor); } + tier2 op(_COLD_DYNAMIC_EXIT, ( -- )) { + _PyExitData *exit = tstate->jit_exit; + assert(exit != NULL); + _Py_CODEUNIT *target = frame->instr_ptr; + _Py_BackoffCounter temperature = exit->temperature; + _PyExecutorObject *executor; + if (target->op.code == ENTER_EXECUTOR) { + PyCodeObject *code = _PyFrame_GetCode(frame); + executor = code->co_executors->executors[target->op.arg]; + if (executor->trace[2].opcode == _GUARD_EXECUTOR_IP && executor->vm_data.valid) { + Py_INCREF(executor); + assert(tstate->jit_exit == exit); + exit->executor = executor; + TIER2_TO_TIER2(executor); + } + } + if (!backoff_counter_triggers(temperature)) { + exit->temperature = advance_backoff_counter(temperature); + GOTO_TIER_ONE(target); + } + _PyExecutorObject *previous_executor = _PyExecutor_FromExit(exit); + assert(tstate->current_executor == (PyObject *)previous_executor); + int chain_depth = previous_executor->vm_data.chain_depth + 1; + int succ = _PyJit_TryInitializeTracing(tstate, frame, target, target, target, STACK_LEVEL(), chain_depth, exit, previous_executor, target->op.arg, true); + if (succ) { + GOTO_TIER_ONE_CONTINUE_TRACING(target); + } + exit->temperature = restart_backoff_counter(exit->temperature); + GOTO_TIER_ONE(target); + } + + tier2 op(_GUARD_EXECUTOR_IP, (ip/4 --)) { + EXIT_IF(frame->instr_ptr != (_Py_CODEUNIT*)ip); + } + tier2 op(_GUARD_IP__PUSH_FRAME, (ip/4 --)) { // Implementation automatically inserted by Tools/cases/tier2_generator.py EXIT_IF(true); @@ -5476,33 +5553,6 @@ dummy_func( EXIT_IF(true); } - // Note: this is different than _COLD_EXIT/_EXIT_TRACE, as it may lead to multiple executors - // from a single exit! - tier2 op(_DYNAMIC_EXIT, (exit_p/4 --)) { - _Py_CODEUNIT *target = frame->instr_ptr; -#if defined(Py_DEBUG) && !defined(_Py_JIT) - _PyExitData *exit = (_PyExitData *)exit_p; - OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); - if (frame->lltrace >= 2) { - printf("DYNAMIC EXIT: [UOp "); - _PyUOpPrint(&next_uop[-1]); - printf(", exit %tu, temp %d, target %d -> %s]\n", - exit - current_executor->exits, exit->temperature.value_and_backoff, - (int)(target - _PyFrame_GetBytecode(frame)), - _PyOpcode_OpName[target->op.code]); - } -#endif - if (target->op.code == ENTER_EXECUTOR) { - PyCodeObject *code = _PyFrame_GetCode(frame); - _PyExecutorObject *executor = code->co_executors->executors[target->op.arg]; - tstate->jit_exit = NULL; - TIER2_TO_TIER2(executor); - } - else { - GOTO_TIER_ONE(target); - } - } - label(pop_2_error) { stack_pointer -= 2; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/ceval.c b/Python/ceval.c index 3197eb61b095..0b7781ccfdbd 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1214,7 +1214,10 @@ _PyTier2Interpreter( tier2_start: next_uop = current_executor->trace; - assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT); + assert(next_uop->opcode == _START_EXECUTOR || + next_uop->opcode == _COLD_EXIT || + next_uop->opcode == _COLD_DYNAMIC_EXIT || + next_uop->opcode == _START_DYNAMIC_EXECUTOR); #undef LOAD_IP #define LOAD_IP(UNUSED) (void)0 @@ -1238,13 +1241,16 @@ tier2_start: uint64_t trace_uop_execution_counter = 0; #endif - assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT); + assert(next_uop->opcode == _START_EXECUTOR || + next_uop->opcode == _COLD_EXIT || + next_uop->opcode == _COLD_DYNAMIC_EXIT || + next_uop->opcode == _START_DYNAMIC_EXECUTOR); tier2_dispatch: for (;;) { uopcode = next_uop->opcode; #ifdef Py_DEBUG if (frame->lltrace >= 3) { - dump_stack(frame, stack_pointer); + // dump_stack(frame, stack_pointer); if (next_uop->opcode == _START_EXECUTOR) { printf("%4d uop: ", 0); } diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 0f7e2175480b..b32f11652bc4 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -401,9 +401,17 @@ do { \ next_instr = frame->instr_ptr + 1; \ JUMP_TO_LABEL(error); \ } \ + /* No progress made */ \ + if (next_instr == this_instr) { \ + opcode = executor->vm_data.opcode; \ + oparg = (oparg & ~255) | executor->vm_data.oparg; \ + if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]) { \ + PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter); \ + } \ + DISPATCH_GOTO(); \ + } \ if (keep_tracing_bit) { \ - assert(next_instr->op.code != ENTER_EXECUTOR); \ - assert(tstate->interp->jit_state.code_curr_size == 2); \ + assert(tstate->interp->jit_state.code_curr_size == 2 || tstate->interp->jit_state.code_curr_size == 3); \ ENTER_TRACING(); \ DISPATCH_NON_TRACING(); \ } \ diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 1b52a27621af..173380aff00e 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -7130,6 +7130,35 @@ break; } + case _DYNAMIC_EXIT: { + PyObject *exit_p = (PyObject *)CURRENT_OPERAND0(); + _PyExitData *exit = (_PyExitData *)exit_p; + #if defined(Py_DEBUG) && !defined(_Py_JIT) + _Py_CODEUNIT *target = frame->instr_ptr; + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); + if (frame->lltrace >= 2) { + _PyFrame_SetStackPointer(frame, stack_pointer); + printf("DYNAMIC EXIT: [UOp "); + _PyUOpPrint(&next_uop[-1]); + printf(", exit %tu, temp %d, target %d -> %s]\n", + exit - current_executor->exits, exit->temperature.value_and_backoff, + (int)(target - _PyFrame_GetBytecode(frame)), + _PyOpcode_OpName[target->op.code]); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + #endif + tstate->jit_exit = exit; + assert(exit->is_dynamic); + _PyExecutorObject *exec = exit->executor; + TIER2_TO_TIER2(exec); + break; + } + + case _DYNAMIC_DEOPT: { + GOTO_TIER_ONE(frame->instr_ptr); + break; + } + case _CHECK_VALIDITY: { if (!current_executor->vm_data.valid) { UOP_STAT_INC(uopcode, miss); @@ -7411,6 +7440,27 @@ break; } + case _START_DYNAMIC_EXECUTOR: { + PyObject *executor = (PyObject *)CURRENT_OPERAND0(); + #ifndef _Py_JIT + assert(current_executor == (_PyExecutorObject*)executor); + #endif + assert(tstate->jit_exit != NULL || tstate->jit_exit->executor == current_executor); + tstate->current_executor = (PyObject *)executor; + if (!current_executor->vm_data.valid) { + assert(tstate->jit_exit->executor == current_executor); + assert(tstate->current_executor == executor); + _PyFrame_SetStackPointer(frame, stack_pointer); + _PyExecutor_ClearExit(tstate->jit_exit); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (true) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + } + break; + } + case _MAKE_WARM: { current_executor->vm_data.warm = true; break; @@ -7484,10 +7534,11 @@ _PyExecutorObject *previous_executor = _PyExecutor_FromExit(exit); assert(tstate->current_executor == (PyObject *)previous_executor); int chain_depth = previous_executor->vm_data.chain_depth + 1; - int succ = _PyJit_TryInitializeTracing(tstate, frame, target, target, target, STACK_LEVEL(), chain_depth, exit, previous_executor, target->op.arg); + int succ = _PyJit_TryInitializeTracing(tstate, frame, target, target, target, STACK_LEVEL(), chain_depth, exit, previous_executor, target->op.arg, false); if (succ) { GOTO_TIER_ONE_CONTINUE_TRACING(target); } + exit->temperature = restart_backoff_counter(exit->temperature); GOTO_TIER_ONE(target); } assert(tstate->jit_exit == exit); @@ -7496,32 +7547,44 @@ break; } - case _DYNAMIC_EXIT: { - PyObject *exit_p = (PyObject *)CURRENT_OPERAND0(); + case _COLD_DYNAMIC_EXIT: { + _PyExitData *exit = tstate->jit_exit; + assert(exit != NULL); _Py_CODEUNIT *target = frame->instr_ptr; - #if defined(Py_DEBUG) && !defined(_Py_JIT) - _PyExitData *exit = (_PyExitData *)exit_p; - OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); - if (frame->lltrace >= 2) { - _PyFrame_SetStackPointer(frame, stack_pointer); - printf("DYNAMIC EXIT: [UOp "); - _PyUOpPrint(&next_uop[-1]); - printf(", exit %tu, temp %d, target %d -> %s]\n", - exit - current_executor->exits, exit->temperature.value_and_backoff, - (int)(target - _PyFrame_GetBytecode(frame)), - _PyOpcode_OpName[target->op.code]); - stack_pointer = _PyFrame_GetStackPointer(frame); - } - #endif + _Py_BackoffCounter temperature = exit->temperature; + _PyExecutorObject *executor; if (target->op.code == ENTER_EXECUTOR) { PyCodeObject *code = _PyFrame_GetCode(frame); - _PyExecutorObject *executor = code->co_executors->executors[target->op.arg]; - tstate->jit_exit = NULL; - TIER2_TO_TIER2(executor); + executor = code->co_executors->executors[target->op.arg]; + if (executor->trace[2].opcode == _GUARD_EXECUTOR_IP && executor->vm_data.valid) { + Py_INCREF(executor); + assert(tstate->jit_exit == exit); + exit->executor = executor; + TIER2_TO_TIER2(executor); + } } - else { + if (!backoff_counter_triggers(temperature)) { + exit->temperature = advance_backoff_counter(temperature); GOTO_TIER_ONE(target); } + _PyExecutorObject *previous_executor = _PyExecutor_FromExit(exit); + assert(tstate->current_executor == (PyObject *)previous_executor); + int chain_depth = previous_executor->vm_data.chain_depth + 1; + int succ = _PyJit_TryInitializeTracing(tstate, frame, target, target, target, STACK_LEVEL(), chain_depth, exit, previous_executor, target->op.arg, true); + if (succ) { + GOTO_TIER_ONE_CONTINUE_TRACING(target); + } + exit->temperature = restart_backoff_counter(exit->temperature); + GOTO_TIER_ONE(target); + break; + } + + case _GUARD_EXECUTOR_IP: { + PyObject *ip = (PyObject *)CURRENT_OPERAND0(); + if (frame->instr_ptr != (_Py_CODEUNIT*)ip) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index ff5b47496e11..b5d95fd19ae4 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -7716,7 +7716,7 @@ oparg >>= 8; insert_exec_at--; } - int succ = _PyJit_TryInitializeTracing(tstate, frame, this_instr, insert_exec_at, next_instr, STACK_LEVEL(), 0, NULL, NULL, oparg); + int succ = _PyJit_TryInitializeTracing(tstate, frame, this_instr, insert_exec_at, next_instr, STACK_LEVEL(), 0, NULL, NULL, oparg, false); if (succ) { ENTER_TRACING(); } diff --git a/Python/optimizer.c b/Python/optimizer.c index f0ea40b44ef1..1035f310c984 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -124,6 +124,10 @@ _PyOptimizer_Optimize( assert(!interp->compiling); assert(tstate->interp->jit_state.initial_stack_depth >= 0); #ifndef Py_GIL_DISABLED + // Trace got stomped on by another thread. + if (tstate->interp->jit_state.initial_func == NULL) { + return 0; + } interp->compiling = true; // The first executor in a chain and the MAX_CHAIN_DEPTH'th executor *must* // make progress in order to avoid infinite loops or excessively-long @@ -937,13 +941,20 @@ full: // Returns 0 for do not enter tracing, 1 on enter tracing. int -_PyJit_TryInitializeTracing(PyThreadState *tstate, _PyInterpreterFrame *frame, _Py_CODEUNIT *curr_instr, _Py_CODEUNIT *insert_exec_instr, _Py_CODEUNIT *close_loop_instr, int curr_stackdepth, int chain_depth, _PyExitData *exit, _PyExecutorObject *prev_exec, int oparg) +_PyJit_TryInitializeTracing(PyThreadState *tstate, _PyInterpreterFrame *frame, _Py_CODEUNIT *curr_instr, _Py_CODEUNIT *insert_exec_instr, _Py_CODEUNIT *close_loop_instr, int curr_stackdepth, int chain_depth, _PyExitData *exit, _PyExecutorObject *prev_exec, int oparg, bool is_dynamic_target) { // A recursive trace. // Don't trace into the inner call because it will stomp on the previous trace, causing endless retraces. if (tstate->interp->jit_state.code_curr_size > 2) { return 0; } + if (oparg > 0xFFFF) { + return 0; + } + // Dynamic exits with progress is wonky. + if (is_dynamic_target && chain_depth >= MAX_CHAIN_DEPTH) { + return 0; + } PyCodeObject *code = _PyFrame_GetCode(frame); #ifdef Py_DEBUG char *python_lltrace = Py_GETENV("PYTHON_LLTRACE"); @@ -959,9 +970,18 @@ _PyJit_TryInitializeTracing(PyThreadState *tstate, _PyInterpreterFrame *frame, _ 2 * INSTR_IP(close_loop_instr, code), chain_depth); #endif - add_to_trace(tstate->interp->jit_state.code_buffer, 0, _START_EXECUTOR, 0, (uintptr_t)insert_exec_instr, INSTR_IP(insert_exec_instr, code)); - add_to_trace(tstate->interp->jit_state.code_buffer, 1, _MAKE_WARM, 0, 0, 0); - tstate->interp->jit_state.code_curr_size = 2; + + if (is_dynamic_target) { + add_to_trace(tstate->interp->jit_state.code_buffer, 0, _START_DYNAMIC_EXECUTOR, 0, (uintptr_t)insert_exec_instr, INSTR_IP(insert_exec_instr, code)); + add_to_trace(tstate->interp->jit_state.code_buffer, 1, _MAKE_WARM, 0, 0, 0); + add_to_trace(tstate->interp->jit_state.code_buffer, 2, _GUARD_EXECUTOR_IP, 0, (uintptr_t)curr_instr, 0); + tstate->interp->jit_state.code_curr_size = 3; + } + else { + add_to_trace(tstate->interp->jit_state.code_buffer, 0, _START_EXECUTOR, 0, (uintptr_t)insert_exec_instr, INSTR_IP(insert_exec_instr, code)); + add_to_trace(tstate->interp->jit_state.code_buffer, 1, _MAKE_WARM, 0, 0, 0); + tstate->interp->jit_state.code_curr_size = 2; + } tstate->interp->jit_state.code_max_size = UOP_MAX_TRACE_LENGTH; tstate->interp->jit_state.insert_exec_instr = insert_exec_instr; tstate->interp->jit_state.close_loop_instr = close_loop_instr; @@ -1081,11 +1101,15 @@ prepare_for_execution(_PyUOpInstruction *buffer, int length) opcode == _GUARD_IP__PUSH_FRAME || opcode == _GUARD_IP_RETURN_VALUE || opcode == _GUARD_IP_YIELD_VALUE || - opcode == _GUARD_IP_RETURN_GENERATOR + opcode == _GUARD_IP_RETURN_GENERATOR || + opcode == _GUARD_EXECUTOR_IP ) { exit_op = _DYNAMIC_EXIT; unique_target = true; } + if (opcode == _START_DYNAMIC_EXECUTOR) { + exit_op = _DYNAMIC_DEOPT; + } if (unique_target || jump_target != current_jump_target || current_exit_op != exit_op) { make_exit(&buffer[next_spare], exit_op, jump_target); current_exit_op = exit_op; @@ -1114,7 +1138,7 @@ prepare_for_execution(_PyUOpInstruction *buffer, int length) } } if (opcode == _JUMP_TO_TOP) { - assert(buffer[0].opcode == _START_EXECUTOR); + assert(buffer[0].opcode == _START_EXECUTOR || buffer[0].opcode == _START_DYNAMIC_EXECUTOR); buffer[i].format = UOP_FORMAT_JUMP; buffer[i].jump_target = 1; } @@ -1161,7 +1185,10 @@ sanity_check(_PyExecutorObject *executor) } bool ended = false; uint32_t i = 0; - CHECK(executor->trace[0].opcode == _START_EXECUTOR || executor->trace[0].opcode == _COLD_EXIT); + CHECK(executor->trace[0].opcode == _START_EXECUTOR || + executor->trace[0].opcode == _COLD_EXIT || + executor->trace[0].opcode == _COLD_DYNAMIC_EXIT || + executor->trace[0].opcode == _START_DYNAMIC_EXECUTOR); for (; i < executor->code_size; i++) { const _PyUOpInstruction *inst = &executor->trace[i]; uint16_t opcode = inst->opcode; @@ -1194,7 +1221,8 @@ sanity_check(_PyExecutorObject *executor) opcode == _HANDLE_PENDING_AND_DEOPT || opcode == _EXIT_TRACE || opcode == _ERROR_POP_N || - opcode == _DYNAMIC_EXIT); + opcode == _DYNAMIC_EXIT || + opcode == _DYNAMIC_DEOPT); } } @@ -1217,15 +1245,15 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil /* Initialize exits */ _PyExecutorObject *cold = _PyExecutor_GetColdExecutor(); + _PyExecutorObject *cold_dynamic = _PyExecutor_GetColdDynamicExecutor(); cold->vm_data.chain_depth = chain_depth; for (int i = 0; i < exit_count; i++) { executor->exits[i].index = i; executor->exits[i].temperature = initial_temperature_backoff_counter(); - executor->exits[i].executor = cold; } int next_exit = exit_count-1; _PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length]; - assert(buffer[0].opcode == _START_EXECUTOR); + assert(buffer[0].opcode == _START_EXECUTOR || buffer[0].opcode == _START_DYNAMIC_EXECUTOR); buffer[0].operand0 = (uint64_t)executor; for (int i = length-1; i >= 0; i--) { int opcode = buffer[i].opcode; @@ -1235,12 +1263,14 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil _PyExitData *exit = &executor->exits[next_exit]; exit->target = buffer[i].target; dest->operand0 = (uint64_t)exit; + exit->executor = opcode == _EXIT_TRACE ? cold : cold_dynamic; + exit->is_dynamic = (char)(opcode == _DYNAMIC_EXIT); next_exit--; } } assert(next_exit == -1); assert(dest == executor->trace); - assert(dest->opcode == _START_EXECUTOR); + assert(dest->opcode == _START_EXECUTOR || dest->opcode == _START_DYNAMIC_EXECUTOR); _Py_ExecutorInit(executor, dependencies); #ifdef Py_DEBUG char *python_lltrace = Py_GETENV("PYTHON_LLTRACE"); @@ -1547,6 +1577,34 @@ _PyExecutor_GetColdExecutor(void) return cold; } +_PyExecutorObject * +_PyExecutor_GetColdDynamicExecutor(void) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (interp->cold_dynamic_executor != NULL) { + return interp->cold_dynamic_executor; + } + _PyExecutorObject *cold = allocate_executor(0, 1); + if (cold == NULL) { + Py_FatalError("Cannot allocate core JIT code"); + } + ((_PyUOpInstruction *)cold->trace)->opcode = _COLD_DYNAMIC_EXIT; +#ifdef _Py_JIT + cold->jit_code = NULL; + cold->jit_size = 0; + // This is initialized to true so we can prevent the executor + // from being immediately detected as cold and invalidated. + cold->vm_data.warm = true; + if (_PyJIT_Compile(cold, cold->trace, 1)) { + Py_DECREF(cold); + Py_FatalError("Cannot allocate core JIT code"); + } +#endif + _Py_SetImmortal((PyObject *)cold); + interp->cold_dynamic_executor = cold; + return cold; +} + void _PyExecutor_ClearExit(_PyExitData *exit) { @@ -1554,7 +1612,12 @@ _PyExecutor_ClearExit(_PyExitData *exit) return; } _PyExecutorObject *old = exit->executor; - exit->executor = _PyExecutor_GetColdExecutor(); + if (exit->is_dynamic) { + exit->executor = _PyExecutor_GetColdDynamicExecutor(); + } + else { + exit->executor = _PyExecutor_GetColdExecutor(); + } Py_DECREF(old); } @@ -1808,6 +1871,7 @@ executor_to_gv(_PyExecutorObject *executor, FILE *out) /* Write all the outgoing edges */ _PyExecutorObject *cold = _PyExecutor_GetColdExecutor(); + _PyExecutorObject *cold_dynamic = _PyExecutor_GetColdDynamicExecutor(); for (uint32_t i = 0; i < executor->code_size; i++) { _PyUOpInstruction const *inst = &executor->trace[i]; uint16_t flags = _PyUop_Flags[inst->opcode]; @@ -1821,7 +1885,7 @@ executor_to_gv(_PyExecutorObject *executor, FILE *out) assert(exit_inst->opcode == _EXIT_TRACE || exit_inst->opcode == _DYNAMIC_EXIT); exit = (_PyExitData *)exit_inst->operand0; } - if (exit != NULL && exit->executor != cold) { + if (exit != NULL && exit->executor != cold && exit->executor != cold_dynamic) { fprintf(out, "executor_%p:i%d -> executor_%p:start\n", executor, i, exit->executor); } if (inst->opcode == _EXIT_TRACE || inst->opcode == _JUMP_TO_TOP) { diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index c576f3b716bc..e1a6f29f47d1 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -448,6 +448,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) buffer[pc].opcode = _NOP; } break; + case _EXIT_TRACE: default: { // Cancel out pushes and pops, repeatedly. So: @@ -481,7 +482,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } /* _PUSH_FRAME doesn't escape or error, but it * does need the IP for the return address */ - bool needs_ip = (opcode == _PUSH_FRAME || opcode == _YIELD_VALUE || opcode == _DYNAMIC_EXIT); + bool needs_ip = (opcode == _PUSH_FRAME || opcode == _YIELD_VALUE || opcode == _DYNAMIC_EXIT || opcode == _EXIT_TRACE); if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) { needs_ip = true; may_have_escaped = true; @@ -491,10 +492,12 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) buffer[last_set_ip].opcode = _SET_IP; last_set_ip = -1; } + if (opcode == _EXIT_TRACE) { + return pc + 1; + } break; } case _JUMP_TO_TOP: - case _EXIT_TRACE: case _DYNAMIC_EXIT: return pc + 1; } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 40bf95d3081b..50e5287c91aa 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -3274,6 +3274,14 @@ break; } + case _DYNAMIC_EXIT: { + break; + } + + case _DYNAMIC_DEOPT: { + break; + } + case _CHECK_VALIDITY: { break; } @@ -3399,6 +3407,10 @@ break; } + case _START_DYNAMIC_EXECUTOR: { + break; + } + case _MAKE_WARM: { break; } @@ -3427,6 +3439,14 @@ break; } + case _COLD_DYNAMIC_EXIT: { + break; + } + + case _GUARD_EXECUTOR_IP: { + break; + } + case _GUARD_IP__PUSH_FRAME: { break; } @@ -3443,7 +3463,3 @@ break; } - case _DYNAMIC_EXIT: { - break; - } - diff --git a/Python/pystate.c b/Python/pystate.c index 45b7d1f3ea6e..0676a86965cf 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -827,6 +827,14 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) assert(cold->vm_data.warm); _PyExecutor_Free(cold); } + + struct _PyExecutorObject *cold_dynamic = interp->cold_dynamic_executor; + if (cold_dynamic != NULL) { + interp->cold_dynamic_executor = NULL; + assert(cold_dynamic->vm_data.valid); + assert(cold_dynamic->vm_data.warm); + _PyExecutor_Free(cold_dynamic); + } /* We don't clear sysdict and builtins until the end of this function. Because clearing other attributes can execute arbitrary Python code which requires sysdict and builtins. */