]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-112354: Initial implementation of warm up on exits and trace-stitching (GH-114142)
authorMark Shannon <mark@hotpy.org>
Tue, 20 Feb 2024 09:39:55 +0000 (09:39 +0000)
committerGitHub <noreply@github.com>
Tue, 20 Feb 2024 09:39:55 +0000 (09:39 +0000)
29 files changed:
.gitattributes
Include/cpython/optimizer.h
Include/cpython/pystate.h
Include/internal/pycore_interp.h
Include/internal/pycore_jit.h
Include/internal/pycore_opcode_metadata.h
Include/internal/pycore_uop_ids.h
Include/internal/pycore_uop_metadata.h
Lib/test/test_frame.py
Lib/test/test_generated_cases.py
Modules/_testinternalcapi.c
Python/bytecodes.c
Python/ceval.c
Python/ceval_macros.h
Python/executor_cases.c.h
Python/generated_cases.c.h
Python/jit.c
Python/optimizer.c
Python/pylifecycle.c
Python/pystate.c
Python/tier2_engine.md [new file with mode: 0644]
Python/tier2_redundancy_eliminator_cases.c.h
Tools/c-analyzer/cpython/_parser.py
Tools/c-analyzer/cpython/ignored.tsv
Tools/cases_generator/analyzer.py
Tools/cases_generator/generators_common.py
Tools/cases_generator/opcode_metadata_generator.py
Tools/cases_generator/tier2_generator.py
Tools/jit/template.c

index c984797e1ac7c6095eed3f22deae946f9ac5c50e..159cd83ff7407d68cb75171428cdee707ee3cf76 100644 (file)
@@ -77,6 +77,7 @@ Include/internal/pycore_opcode.h                    generated
 Include/internal/pycore_opcode_metadata.h           generated
 Include/internal/pycore_*_generated.h               generated
 Include/internal/pycore_uop_ids.h                   generated
+Include/internal/pycore_uop_metadata.h              generated
 Include/opcode.h                                    generated
 Include/opcode_ids.h                                generated
 Include/token.h                                     generated
index f710ca76b2ba24afcad328a5c0d8db11fbf856b8..fe54d1ddfe61293d50dd387e0a87f272ca22499a 100644 (file)
@@ -33,16 +33,28 @@ typedef struct {
 typedef struct {
     uint16_t opcode;
     uint16_t oparg;
-    uint32_t target;
+    union {
+        uint32_t target;
+        uint32_t exit_index;
+    };
     uint64_t operand;  // A cache entry
 } _PyUOpInstruction;
 
+typedef struct _exit_data {
+    uint32_t target;
+    int16_t temperature;
+    const struct _PyExecutorObject *executor;
+} _PyExitData;
+
 typedef struct _PyExecutorObject {
     PyObject_VAR_HEAD
+    const _PyUOpInstruction *trace;
     _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */
-    void *jit_code;
+    uint32_t exit_count;
+    uint32_t code_size;
     size_t jit_size;
-    _PyUOpInstruction trace[1];
+    void *jit_code;
+    _PyExitData exits[1];
 } _PyExecutorObject;
 
 typedef struct _PyOptimizerObject _PyOptimizerObject;
@@ -59,6 +71,7 @@ typedef struct _PyOptimizerObject {
     /* These thresholds are treated as signed so do not exceed INT16_MAX
      * Use INT16_MAX to indicate that the optimizer should never be called */
     uint16_t resume_threshold;
+    uint16_t side_threshold;
     uint16_t backedge_threshold;
     /* Data needed by the optimizer goes here, but is opaque to the VM */
 } _PyOptimizerObject;
@@ -73,16 +86,16 @@ PyAPI_FUNC(int) PyUnstable_Replace_Executor(PyCodeObject *code, _Py_CODEUNIT *in
 
 _PyOptimizerObject *_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject* optimizer);
 
-PyAPI_FUNC(void) PyUnstable_SetOptimizer(_PyOptimizerObject* optimizer);
+PyAPI_FUNC(int) PyUnstable_SetOptimizer(_PyOptimizerObject* optimizer);
 
 PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void);
 
 PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int offset);
 
 int
-_PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer);
+_PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer, _PyExecutorObject **exec_ptr);
 
-void _Py_ExecutorInit(_PyExecutorObject *, _PyBloomFilter *);
+void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *);
 void _Py_ExecutorClear(_PyExecutorObject *);
 void _Py_BloomFilter_Init(_PyBloomFilter *);
 void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj);
index 9bc8758e72bd8f2a0caf817d3bc417822b5d2532..b99450a8a8d0935e8ad18f831d4e488783176060 100644 (file)
@@ -212,6 +212,8 @@ struct _ts {
     /* The thread's exception stack entry.  (Always the last entry.) */
     _PyErr_StackItem exc_state;
 
+    PyObject *previous_executor;
+
 };
 
 #ifdef Py_DEBUG
index 567d6a9bd510ab4f63558ed7c2fddbc99e839ce7..06eba665c80e930ed4dd13450e2eb4cf9ca253cf 100644 (file)
@@ -237,10 +237,13 @@ struct _is {
     struct callable_cache callable_cache;
     _PyOptimizerObject *optimizer;
     _PyExecutorObject *executor_list_head;
-    /* These values are shifted and offset to speed up check in JUMP_BACKWARD */
+
+    /* These two values are shifted and offset to speed up check in JUMP_BACKWARD */
     uint32_t optimizer_resume_threshold;
     uint32_t optimizer_backedge_threshold;
 
+    uint16_t optimizer_side_threshold;
+
     uint32_t next_func_version;
     _rare_events rare_events;
     PyDict_WatchCallback builtins_dict_watcher;
index 0b71eb6f758ac6220414711829f0215a681f0889..17bd23f0752be20f2600a434fd33b36062969839 100644 (file)
@@ -13,7 +13,7 @@ extern "C" {
 
 typedef _Py_CODEUNIT *(*jit_func)(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *tstate);
 
-int _PyJIT_Compile(_PyExecutorObject *executor, _PyUOpInstruction *trace, size_t length);
+int _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size_t length);
 void _PyJIT_Free(_PyExecutorObject *executor);
 
 #endif  // _Py_JIT
index 6b60a6fbffdc5e45fb9847697b3f1cffc7af7178..177dd302f73171a5cbea9b1662d91316af83ea8e 100644 (file)
@@ -909,8 +909,9 @@ enum InstructionFormat {
 #define HAS_DEOPT_FLAG (128)
 #define HAS_ERROR_FLAG (256)
 #define HAS_ESCAPES_FLAG (512)
-#define HAS_PURE_FLAG (1024)
-#define HAS_PASSTHROUGH_FLAG (2048)
+#define HAS_EXIT_FLAG (1024)
+#define HAS_PURE_FLAG (2048)
+#define HAS_PASSTHROUGH_FLAG (4096)
 #define OPCODE_HAS_ARG(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ARG_FLAG))
 #define OPCODE_HAS_CONST(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_CONST_FLAG))
 #define OPCODE_HAS_NAME(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_NAME_FLAG))
@@ -921,6 +922,7 @@ enum InstructionFormat {
 #define OPCODE_HAS_DEOPT(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_DEOPT_FLAG))
 #define OPCODE_HAS_ERROR(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ERROR_FLAG))
 #define OPCODE_HAS_ESCAPES(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ESCAPES_FLAG))
+#define OPCODE_HAS_EXIT(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_EXIT_FLAG))
 #define OPCODE_HAS_PURE(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_PURE_FLAG))
 #define OPCODE_HAS_PASSTHROUGH(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_PASSTHROUGH_FLAG))
 
@@ -945,14 +947,14 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = {
     [BEFORE_ASYNC_WITH] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [BEFORE_WITH] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [BINARY_OP] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
-    [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG },
-    [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG },
-    [BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
-    [BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IXC, HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
-    [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG },
-    [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG },
-    [BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG },
-    [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG },
+    [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
+    [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG },
+    [BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
+    [BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IXC, HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
+    [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
+    [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG },
+    [BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
+    [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG },
     [BINARY_SLICE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [BINARY_SUBSCR] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [BINARY_SUBSCR_DICT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
@@ -1060,16 +1062,16 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = {
     [LOAD_ATTR] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [LOAD_ATTR_CLASS] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
     [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
-    [LOAD_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
-    [LOAD_ATTR_METHOD_LAZY_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
-    [LOAD_ATTR_METHOD_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
-    [LOAD_ATTR_METHOD_WITH_VALUES] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
+    [LOAD_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
+    [LOAD_ATTR_METHOD_LAZY_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
+    [LOAD_ATTR_METHOD_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
+    [LOAD_ATTR_METHOD_WITH_VALUES] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
     [LOAD_ATTR_MODULE] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
-    [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
-    [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
+    [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
+    [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
     [LOAD_ATTR_PROPERTY] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
-    [LOAD_ATTR_SLOT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG },
-    [LOAD_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
+    [LOAD_ATTR_SLOT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
+    [LOAD_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
     [LOAD_BUILD_CLASS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [LOAD_CONST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_PURE_FLAG },
     [LOAD_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
@@ -1118,8 +1120,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = {
     [SET_FUNCTION_ATTRIBUTE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG },
     [SET_UPDATE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [STORE_ATTR] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
-    [STORE_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IXC000, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
-    [STORE_ATTR_SLOT] = { true, INSTR_FMT_IXC000, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
+    [STORE_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IXC000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
+    [STORE_ATTR_SLOT] = { true, INSTR_FMT_IXC000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
     [STORE_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
     [STORE_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ESCAPES_FLAG },
     [STORE_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG },
@@ -1133,12 +1135,12 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = {
     [STORE_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
     [SWAP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_PURE_FLAG },
     [TO_BOOL] = { true, INSTR_FMT_IXC00, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
-    [TO_BOOL_ALWAYS_TRUE] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG },
-    [TO_BOOL_BOOL] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG },
-    [TO_BOOL_INT] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG },
-    [TO_BOOL_LIST] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG },
-    [TO_BOOL_NONE] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG },
-    [TO_BOOL_STR] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG },
+    [TO_BOOL_ALWAYS_TRUE] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
+    [TO_BOOL_BOOL] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
+    [TO_BOOL_INT] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
+    [TO_BOOL_LIST] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
+    [TO_BOOL_NONE] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
+    [TO_BOOL_STR] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
     [UNARY_INVERT] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [UNARY_NEGATIVE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [UNARY_NOT] = { true, INSTR_FMT_IX, HAS_PURE_FLAG },
index 9bb537d355055dbc1539fa910595dfe50e091028..ed800a73796da052fbf2d75a295aacb67d10aaf7 100644 (file)
@@ -139,7 +139,6 @@ extern "C" {
 #define _CONTAINS_OP CONTAINS_OP
 #define _CHECK_EG_MATCH CHECK_EG_MATCH
 #define _CHECK_EXC_MATCH CHECK_EXC_MATCH
-#define _JUMP_BACKWARD JUMP_BACKWARD
 #define _POP_JUMP_IF_FALSE 339
 #define _POP_JUMP_IF_TRUE 340
 #define _IS_NONE 341
@@ -237,8 +236,11 @@ extern "C" {
 #define _CHECK_GLOBALS 384
 #define _CHECK_BUILTINS 385
 #define _INTERNAL_INCREMENT_OPT_COUNTER 386
-#define _CHECK_VALIDITY_AND_SET_IP 387
-#define MAX_UOP_ID 387
+#define _COLD_EXIT 387
+#define _START_EXECUTOR 388
+#define _FATAL_ERROR 389
+#define _CHECK_VALIDITY_AND_SET_IP 390
+#define MAX_UOP_ID 390
 
 #ifdef __cplusplus
 }
index 163a0320aa22985fce42c4ef23d8a4f5c55811c7..1e2dfecd9cf8afeb89c3ee7b9882148c96f09957 100644 (file)
@@ -32,22 +32,22 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_UNARY_NEGATIVE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_UNARY_NOT] = HAS_PURE_FLAG,
     [_TO_BOOL] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
-    [_TO_BOOL_BOOL] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG,
-    [_TO_BOOL_INT] = HAS_DEOPT_FLAG,
-    [_TO_BOOL_LIST] = HAS_DEOPT_FLAG,
-    [_TO_BOOL_NONE] = HAS_DEOPT_FLAG,
-    [_TO_BOOL_STR] = HAS_DEOPT_FLAG,
-    [_TO_BOOL_ALWAYS_TRUE] = HAS_DEOPT_FLAG,
+    [_TO_BOOL_BOOL] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_PASSTHROUGH_FLAG,
+    [_TO_BOOL_INT] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG,
+    [_TO_BOOL_LIST] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG,
+    [_TO_BOOL_NONE] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG,
+    [_TO_BOOL_STR] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG,
+    [_TO_BOOL_ALWAYS_TRUE] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG,
     [_UNARY_INVERT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
-    [_GUARD_BOTH_INT] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG,
+    [_GUARD_BOTH_INT] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_PASSTHROUGH_FLAG,
     [_BINARY_OP_MULTIPLY_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG,
     [_BINARY_OP_ADD_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG,
     [_BINARY_OP_SUBTRACT_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG,
-    [_GUARD_BOTH_FLOAT] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG,
+    [_GUARD_BOTH_FLOAT] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_PASSTHROUGH_FLAG,
     [_BINARY_OP_MULTIPLY_FLOAT] = HAS_PURE_FLAG,
     [_BINARY_OP_ADD_FLOAT] = HAS_PURE_FLAG,
     [_BINARY_OP_SUBTRACT_FLOAT] = HAS_PURE_FLAG,
-    [_GUARD_BOTH_UNICODE] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG,
+    [_GUARD_BOTH_UNICODE] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_PASSTHROUGH_FLAG,
     [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG,
     [_BINARY_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
@@ -112,7 +112,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_LOAD_SUPER_ATTR_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_LOAD_SUPER_ATTR_METHOD] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_LOAD_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
-    [_GUARD_TYPE_VERSION] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG,
+    [_GUARD_TYPE_VERSION] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_PASSTHROUGH_FLAG,
     [_CHECK_MANAGED_OBJECT_HAS_VALUES] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG,
     [_LOAD_ATTR_INSTANCE_VALUE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG,
     [_CHECK_ATTR_MODULE] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG,
@@ -193,14 +193,14 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_COPY] = HAS_ARG_FLAG | HAS_PURE_FLAG,
     [_BINARY_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG,
     [_SWAP] = HAS_ARG_FLAG | HAS_PURE_FLAG,
-    [_GUARD_IS_TRUE_POP] = HAS_DEOPT_FLAG,
-    [_GUARD_IS_FALSE_POP] = HAS_DEOPT_FLAG,
-    [_GUARD_IS_NONE_POP] = HAS_DEOPT_FLAG,
-    [_GUARD_IS_NOT_NONE_POP] = HAS_DEOPT_FLAG,
+    [_GUARD_IS_TRUE_POP] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG,
+    [_GUARD_IS_FALSE_POP] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG,
+    [_GUARD_IS_NONE_POP] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG,
+    [_GUARD_IS_NOT_NONE_POP] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG,
     [_JUMP_TO_TOP] = HAS_EVAL_BREAK_FLAG,
     [_SET_IP] = 0,
     [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG,
-    [_EXIT_TRACE] = HAS_DEOPT_FLAG,
+    [_EXIT_TRACE] = HAS_DEOPT_FLAG | HAS_EXIT_FLAG,
     [_CHECK_VALIDITY] = HAS_DEOPT_FLAG,
     [_LOAD_CONST_INLINE] = HAS_PURE_FLAG,
     [_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG,
@@ -209,6 +209,9 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_CHECK_GLOBALS] = HAS_DEOPT_FLAG,
     [_CHECK_BUILTINS] = HAS_DEOPT_FLAG,
     [_INTERNAL_INCREMENT_OPT_COUNTER] = 0,
+    [_COLD_EXIT] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
+    [_START_EXECUTOR] = 0,
+    [_FATAL_ERROR] = HAS_ESCAPES_FLAG,
     [_CHECK_VALIDITY_AND_SET_IP] = HAS_DEOPT_FLAG,
 };
 
@@ -266,6 +269,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
     [_CHECK_STACK_SPACE] = "_CHECK_STACK_SPACE",
     [_CHECK_VALIDITY] = "_CHECK_VALIDITY",
     [_CHECK_VALIDITY_AND_SET_IP] = "_CHECK_VALIDITY_AND_SET_IP",
+    [_COLD_EXIT] = "_COLD_EXIT",
     [_COMPARE_OP] = "_COMPARE_OP",
     [_COMPARE_OP_FLOAT] = "_COMPARE_OP_FLOAT",
     [_COMPARE_OP_INT] = "_COMPARE_OP_INT",
@@ -285,6 +289,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
     [_END_SEND] = "_END_SEND",
     [_EXIT_INIT_CHECK] = "_EXIT_INIT_CHECK",
     [_EXIT_TRACE] = "_EXIT_TRACE",
+    [_FATAL_ERROR] = "_FATAL_ERROR",
     [_FORMAT_SIMPLE] = "_FORMAT_SIMPLE",
     [_FORMAT_WITH_SPEC] = "_FORMAT_WITH_SPEC",
     [_FOR_ITER_TIER_TWO] = "_FOR_ITER_TIER_TWO",
@@ -377,6 +382,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
     [_SET_FUNCTION_ATTRIBUTE] = "_SET_FUNCTION_ATTRIBUTE",
     [_SET_IP] = "_SET_IP",
     [_SET_UPDATE] = "_SET_UPDATE",
+    [_START_EXECUTOR] = "_START_EXECUTOR",
     [_STORE_ATTR] = "_STORE_ATTR",
     [_STORE_ATTR_INSTANCE_VALUE] = "_STORE_ATTR_INSTANCE_VALUE",
     [_STORE_ATTR_SLOT] = "_STORE_ATTR_SLOT",
index f88206de550da08acc07403b3852f05c9f66830c..f8812c281c2deba89afb12acef86051bfd0ecdd7 100644 (file)
@@ -331,6 +331,7 @@ class TestIncompleteFrameAreInvisible(unittest.TestCase):
             # on the *very next* allocation:
             gc.collect()
             gc.set_threshold(1, 0, 0)
+            sys._clear_internal_caches()
             # Okay, so here's the nightmare scenario:
             # - We're tracing the resumption of a generator, which creates a new
             #   frame object.
index a7ad6c7320b4ee48e11bb7e97a6534a88922b779..0d2ccd558d436d58b0832ec9bb9c9d903e452633 100644 (file)
@@ -794,6 +794,17 @@ class TestGeneratedCases(unittest.TestCase):
         self.run_cases_test(input, output)
 
 
+    def test_deopt_and_exit(self):
+        input = """
+        pure op(OP, (arg1 -- out)) {
+            DEOPT_IF(1);
+            EXIT_IF(1);
+        }
+        """
+        output = ""
+        with self.assertRaises(Exception):
+            self.run_cases_test(input, output)
+
 class TestGeneratedAbstractCases(unittest.TestCase):
     def setUp(self) -> None:
         super().setUp()
index 3834f00009cea4eae2ca83be08c7238b3af8044a..bcc431a27001f257f6e66900ba180840d17f0915 100644 (file)
@@ -977,7 +977,9 @@ set_optimizer(PyObject *self, PyObject *opt)
     if (opt == Py_None) {
         opt = NULL;
     }
-    PyUnstable_SetOptimizer((_PyOptimizerObject*)opt);
+    if (PyUnstable_SetOptimizer((_PyOptimizerObject*)opt) < 0) {
+        return NULL;
+    }
     Py_RETURN_NONE;
 }
 
index 6822e772e913e8de89d41d7616122aa945ae283f..2e0008e63f6e0c634138d84acc3e451afc00ad3b 100644 (file)
@@ -340,12 +340,12 @@ dummy_func(
         macro(TO_BOOL) = _SPECIALIZE_TO_BOOL + unused/2 + _TO_BOOL;
 
         inst(TO_BOOL_BOOL, (unused/1, unused/2, value -- value)) {
-            DEOPT_IF(!PyBool_Check(value));
+            EXIT_IF(!PyBool_Check(value));
             STAT_INC(TO_BOOL, hit);
         }
 
         inst(TO_BOOL_INT, (unused/1, unused/2, value -- res)) {
-            DEOPT_IF(!PyLong_CheckExact(value));
+            EXIT_IF(!PyLong_CheckExact(value));
             STAT_INC(TO_BOOL, hit);
             if (_PyLong_IsZero((PyLongObject *)value)) {
                 assert(_Py_IsImmortal(value));
@@ -358,7 +358,7 @@ dummy_func(
         }
 
         inst(TO_BOOL_LIST, (unused/1, unused/2, value -- res)) {
-            DEOPT_IF(!PyList_CheckExact(value));
+            EXIT_IF(!PyList_CheckExact(value));
             STAT_INC(TO_BOOL, hit);
             res = Py_SIZE(value) ? Py_True : Py_False;
             DECREF_INPUTS();
@@ -366,13 +366,13 @@ dummy_func(
 
         inst(TO_BOOL_NONE, (unused/1, unused/2, value -- res)) {
             // This one is a bit weird, because we expect *some* failures:
-            DEOPT_IF(!Py_IsNone(value));
+            EXIT_IF(!Py_IsNone(value));
             STAT_INC(TO_BOOL, hit);
             res = Py_False;
         }
 
         inst(TO_BOOL_STR, (unused/1, unused/2, value -- res)) {
-            DEOPT_IF(!PyUnicode_CheckExact(value));
+            EXIT_IF(!PyUnicode_CheckExact(value));
             STAT_INC(TO_BOOL, hit);
             if (value == &_Py_STR(empty)) {
                 assert(_Py_IsImmortal(value));
@@ -388,7 +388,7 @@ dummy_func(
         inst(TO_BOOL_ALWAYS_TRUE, (unused/1, version/2, value -- res)) {
             // This one is a bit weird, because we expect *some* failures:
             assert(version);
-            DEOPT_IF(Py_TYPE(value)->tp_version_tag != version);
+            EXIT_IF(Py_TYPE(value)->tp_version_tag != version);
             STAT_INC(TO_BOOL, hit);
             DECREF_INPUTS();
             res = Py_True;
@@ -412,8 +412,8 @@ dummy_func(
         };
 
         op(_GUARD_BOTH_INT, (left, right -- left, right)) {
-            DEOPT_IF(!PyLong_CheckExact(left));
-            DEOPT_IF(!PyLong_CheckExact(right));
+            EXIT_IF(!PyLong_CheckExact(left));
+            EXIT_IF(!PyLong_CheckExact(right));
         }
 
         pure op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) {
@@ -448,8 +448,8 @@ dummy_func(
             _GUARD_BOTH_INT + unused/1 + _BINARY_OP_SUBTRACT_INT;
 
         op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) {
-            DEOPT_IF(!PyFloat_CheckExact(left));
-            DEOPT_IF(!PyFloat_CheckExact(right));
+            EXIT_IF(!PyFloat_CheckExact(left));
+            EXIT_IF(!PyFloat_CheckExact(right));
         }
 
         pure op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res)) {
@@ -484,8 +484,8 @@ dummy_func(
             _GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_SUBTRACT_FLOAT;
 
         op(_GUARD_BOTH_UNICODE, (left, right -- left, right)) {
-            DEOPT_IF(!PyUnicode_CheckExact(left));
-            DEOPT_IF(!PyUnicode_CheckExact(right));
+            EXIT_IF(!PyUnicode_CheckExact(left));
+            EXIT_IF(!PyUnicode_CheckExact(right));
         }
 
         pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) {
@@ -1904,7 +1904,7 @@ dummy_func(
         op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) {
             PyTypeObject *tp = Py_TYPE(owner);
             assert(type_version != 0);
-            DEOPT_IF(tp->tp_version_tag != type_version);
+            EXIT_IF(tp->tp_version_tag != type_version);
         }
 
         op(_CHECK_MANAGED_OBJECT_HAS_VALUES, (owner -- owner)) {
@@ -2314,6 +2314,7 @@ dummy_func(
         }
 
         inst(JUMP_BACKWARD, (unused/1 --)) {
+            TIER_ONE_ONLY
             CHECK_EVAL_BREAKER();
             assert(oparg <= INSTR_OFFSET());
             JUMPBY(-oparg);
@@ -2335,13 +2336,13 @@ dummy_func(
                     oparg >>= 8;
                     start--;
                 }
-                int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer);
+                _PyExecutorObject *executor;
+                int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer, &executor);
                 ERROR_IF(optimized < 0, error);
                 if (optimized) {
-                    // Rewind and enter the executor:
-                    assert(start->op.code == ENTER_EXECUTOR);
-                    next_instr = start;
-                    this_instr[1].cache &= OPTIMIZER_BITS_MASK;
+                    assert(tstate->previous_executor == NULL);
+                    tstate->previous_executor = Py_None;
+                    GOTO_TIER_TWO(executor);
                 }
                 else {
                     int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK;
@@ -2371,14 +2372,15 @@ dummy_func(
         inst(ENTER_EXECUTOR, (--)) {
             TIER_ONE_ONLY
             CHECK_EVAL_BREAKER();
-
             PyCodeObject *code = _PyFrame_GetCode(frame);
-            current_executor = code->co_executors->executors[oparg & 255];
-            assert(current_executor->vm_data.index == INSTR_OFFSET() - 1);
-            assert(current_executor->vm_data.code == code);
-            assert(current_executor->vm_data.valid);
-            Py_INCREF(current_executor);
-            GOTO_TIER_TWO();
+            _PyExecutorObject *executor = code->co_executors->executors[oparg & 255];
+            assert(executor->vm_data.index == INSTR_OFFSET() - 1);
+            assert(executor->vm_data.code == code);
+            assert(executor->vm_data.valid);
+            assert(tstate->previous_executor == NULL);
+            tstate->previous_executor = Py_None;
+            Py_INCREF(executor);
+            GOTO_TIER_TWO(executor);
         }
 
         replaced op(_POP_JUMP_IF_FALSE, (cond -- )) {
@@ -3997,26 +3999,26 @@ dummy_func(
         inst(CACHE, (--)) {
             TIER_ONE_ONLY
             assert(0 && "Executing a cache.");
-            Py_UNREACHABLE();
+            Py_FatalError("Executing a cache.");
         }
 
         inst(RESERVED, (--)) {
             TIER_ONE_ONLY
             assert(0 && "Executing RESERVED instruction.");
-            Py_UNREACHABLE();
+            Py_FatalError("Executing RESERVED instruction.");
         }
 
         ///////// Tier-2 only opcodes /////////
 
         op (_GUARD_IS_TRUE_POP, (flag -- )) {
             SYNC_SP();
-            DEOPT_IF(!Py_IsTrue(flag));
+            EXIT_IF(!Py_IsTrue(flag));
             assert(Py_IsTrue(flag));
         }
 
         op (_GUARD_IS_FALSE_POP, (flag -- )) {
             SYNC_SP();
-            DEOPT_IF(!Py_IsFalse(flag));
+            EXIT_IF(!Py_IsFalse(flag));
             assert(Py_IsFalse(flag));
         }
 
@@ -4024,18 +4026,20 @@ dummy_func(
             SYNC_SP();
             if (!Py_IsNone(val)) {
                 Py_DECREF(val);
-                DEOPT_IF(1);
+                EXIT_IF(1);
             }
         }
 
         op (_GUARD_IS_NOT_NONE_POP, (val -- )) {
             SYNC_SP();
-            DEOPT_IF(Py_IsNone(val));
+            EXIT_IF(Py_IsNone(val));
             Py_DECREF(val);
         }
 
         op(_JUMP_TO_TOP, (--)) {
-            next_uop = current_executor->trace;
+#ifndef _Py_JIT
+            next_uop = &current_executor->trace[1];
+#endif
             CHECK_EVAL_BREAKER();
         }
 
@@ -4055,7 +4059,7 @@ dummy_func(
 
         op(_EXIT_TRACE, (--)) {
             TIER_TWO_ONLY
-            DEOPT_IF(1);
+            EXIT_IF(1);
         }
 
         op(_CHECK_VALIDITY, (--)) {
@@ -4101,6 +4105,58 @@ dummy_func(
             exe->count++;
         }
 
+        /* Only used for handling cold side exits, should never appear in
+         * a normal trace or as part of an instruction.
+         */
+        op(_COLD_EXIT, (--)) {
+            TIER_TWO_ONLY
+            _PyExecutorObject *previous = (_PyExecutorObject *)tstate->previous_executor;
+            _PyExitData *exit = &previous->exits[oparg];
+            exit->temperature++;
+            PyCodeObject *code = _PyFrame_GetCode(frame);
+            _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target;
+            if (exit->temperature < (int32_t)tstate->interp->optimizer_side_threshold) {
+                GOTO_TIER_ONE(target);
+            }
+            _PyExecutorObject *executor;
+            if (target->op.code == ENTER_EXECUTOR) {
+                executor = code->co_executors->executors[target->op.arg];
+                Py_INCREF(executor);
+            } else {
+                int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor);
+                if (optimized <= 0) {
+                    int32_t new_temp = -1 * tstate->interp->optimizer_side_threshold;
+                    exit->temperature = (new_temp < INT16_MIN) ? INT16_MIN : new_temp;
+                    if (optimized < 0) {
+                        Py_DECREF(previous);
+                        tstate->previous_executor = Py_None;
+                        ERROR_IF(1, error);
+                    }
+                    GOTO_TIER_ONE(target);
+                }
+            }
+            /* We need two references. One to store in exit->executor and
+             * one to keep the executor alive when executing. */
+            Py_INCREF(executor);
+            exit->executor = executor;
+            GOTO_TIER_TWO(executor);
+        }
+
+        op(_START_EXECUTOR, (executor/4 --)) {
+            TIER_TWO_ONLY
+            Py_DECREF(tstate->previous_executor);
+            tstate->previous_executor = NULL;
+#ifndef _Py_JIT
+            current_executor = (_PyExecutorObject*)executor;
+#endif
+        }
+
+        op(_FATAL_ERROR, (--)) {
+            TIER_TWO_ONLY
+            assert(0);
+            Py_FatalError("Fatal error uop executed.");
+        }
+
         op(_CHECK_VALIDITY_AND_SET_IP, (instr_ptr/4 --)) {
             TIER_TWO_ONLY
             DEOPT_IF(!current_executor->vm_data.valid);
index 4f208009086191481c3d615e97c5ce3f5c4827e7..adccf8fc00f69c0f1734a8c64bdccb4e26979df5 100644 (file)
@@ -16,6 +16,7 @@
 #include "pycore_moduleobject.h"  // PyModuleObject
 #include "pycore_object.h"        // _PyObject_GC_TRACK()
 #include "pycore_opcode_metadata.h" // EXTRA_CASES
+#include "pycore_optimizer.h"     // _PyUOpExecutor_Type
 #include "pycore_opcode_utils.h"  // MAKE_FUNCTION_*
 #include "pycore_pyerrors.h"      // _PyErr_GetRaisedException()
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
@@ -738,15 +739,16 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
         goto resume_with_error;
     }
 
-    /* State shared between Tier 1 and Tier 2 interpreter */
-    _PyExecutorObject *current_executor = NULL;
-
     /* Local "register" variables.
      * These are cached values from the frame and code object.  */
-
     _Py_CODEUNIT *next_instr;
     PyObject **stack_pointer;
 
+#ifndef _Py_JIT
+    /* Tier 2 interpreter state */
+    _PyExecutorObject *current_executor = NULL;
+    const _PyUOpInstruction *next_uop = NULL;
+#endif
 
 start_frame:
     if (_Py_EnterRecursivePy(tstate)) {
@@ -960,18 +962,7 @@ resume_with_error:
 enter_tier_two:
 
 #ifdef _Py_JIT
-
-    ;  // ;)
-    jit_func jitted = current_executor->jit_code;
-    next_instr = jitted(frame, stack_pointer, tstate);
-    frame = tstate->current_frame;
-    Py_DECREF(current_executor);
-    if (next_instr == NULL) {
-        goto resume_with_error;
-    }
-    stack_pointer = _PyFrame_GetStackPointer(frame);
-    DISPATCH();
-
+    assert(0);
 #else
 
 #undef LOAD_IP
@@ -1007,12 +998,12 @@ enter_tier_two:
 #endif
 
     OPT_STAT_INC(traces_executed);
-    _PyUOpInstruction *next_uop = current_executor->trace;
     uint16_t uopcode;
 #ifdef Py_STATS
     uint64_t trace_uop_execution_counter = 0;
 #endif
 
+    assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT);
     for (;;) {
         uopcode = next_uop->opcode;
         DPRINTF(3,
@@ -1075,23 +1066,39 @@ error_tier_two:
     frame->return_offset = 0;  // Don't leave this random
     _PyFrame_SetStackPointer(frame, stack_pointer);
     Py_DECREF(current_executor);
+    tstate->previous_executor = NULL;
     goto resume_with_error;
 
 // Jump here from DEOPT_IF()
 deoptimize:
     next_instr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame));
-    DPRINTF(2, "DEOPT: [UOp %d (%s), oparg %d, operand %" PRIu64 ", target %d @ %d -> %s]\n",
+    DPRINTF(2, "DEOPT: [UOp %d (%s), oparg %d, operand %" PRIu64 ", target %d -> %s]\n",
             uopcode, _PyUOpName(uopcode), next_uop[-1].oparg, next_uop[-1].operand, next_uop[-1].target,
-            (int)(next_uop - current_executor->trace - 1),
-            _PyOpcode_OpName[frame->instr_ptr->op.code]);
+            _PyOpcode_OpName[next_instr->op.code]);
     OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
     UOP_STAT_INC(uopcode, miss);
     Py_DECREF(current_executor);
+    tstate->previous_executor = NULL;
     DISPATCH();
 
+// Jump here from EXIT_IF()
+side_exit:
+    OPT_HIST(trace_uop_execution_counter, trace_run_length_hist);
+    UOP_STAT_INC(uopcode, miss);
+    uint32_t exit_index = next_uop[-1].exit_index;
+    assert(exit_index < current_executor->exit_count);
+    _PyExitData *exit = &current_executor->exits[exit_index];
+    DPRINTF(2, "SIDE EXIT: [UOp %d (%s), oparg %d, operand %" PRIu64 ", exit %u, temp %d, target %d -> %s]\n",
+            uopcode, _PyUOpName(uopcode), next_uop[-1].oparg, next_uop[-1].operand, exit_index, exit->temperature,
+            exit->target, _PyOpcode_OpName[_PyCode_CODE(_PyFrame_GetCode(frame))[exit->target].op.code]);
+    Py_INCREF(exit->executor);
+    tstate->previous_executor = (PyObject *)current_executor;
+    GOTO_TIER_TWO(exit->executor);
+
 #endif  // _Py_JIT
 
 }
+
 #if defined(__GNUC__)
 #  pragma GCC diagnostic pop
 #elif defined(_MSC_VER) /* MS_WINDOWS */
index 1043966c9a82771224b028e02abc9b46f8e59aa3..f796b60335612cbd0a1f6294240047be083ce10a 100644 (file)
@@ -394,7 +394,36 @@ stack_pointer = _PyFrame_GetStackPointer(frame);
 
 /* Tier-switching macros. */
 
-#define GOTO_TIER_TWO() goto enter_tier_two;
+#ifdef _Py_JIT
+#define GOTO_TIER_TWO(EXECUTOR)                        \
+do {                                                   \
+    jit_func jitted = (EXECUTOR)->jit_code;            \
+    next_instr = jitted(frame, stack_pointer, tstate); \
+    Py_DECREF(tstate->previous_executor);              \
+    tstate->previous_executor = NULL;                  \
+    frame = tstate->current_frame;                     \
+    if (next_instr == NULL) {                          \
+        goto resume_with_error;                        \
+    }                                                  \
+    stack_pointer = _PyFrame_GetStackPointer(frame);   \
+    DISPATCH();                                        \
+} while (0)
+#else
+#define GOTO_TIER_TWO(EXECUTOR) \
+do { \
+    next_uop = (EXECUTOR)->trace; \
+    assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT); \
+    goto enter_tier_two; \
+} while (0)
+#endif
+
+#define GOTO_TIER_ONE(TARGET) \
+do { \
+    Py_DECREF(tstate->previous_executor); \
+    tstate->previous_executor = NULL;  \
+    next_instr = target; \
+    DISPATCH(); \
+} while (0)
 
 #define CURRENT_OPARG() (next_uop[-1].oparg)
 
index 11e2a1fe85d51dda8192049d97d448050019db5c..a18284d89ab42cb17f3a9f0609cbd8595b74a87b 100644 (file)
         case _TO_BOOL_BOOL: {
             PyObject *value;
             value = stack_pointer[-1];
-            if (!PyBool_Check(value)) goto deoptimize;
+            if (!PyBool_Check(value)) goto side_exit;
             STAT_INC(TO_BOOL, hit);
             break;
         }
             PyObject *value;
             PyObject *res;
             value = stack_pointer[-1];
-            if (!PyLong_CheckExact(value)) goto deoptimize;
+            if (!PyLong_CheckExact(value)) goto side_exit;
             STAT_INC(TO_BOOL, hit);
             if (_PyLong_IsZero((PyLongObject *)value)) {
                 assert(_Py_IsImmortal(value));
             PyObject *value;
             PyObject *res;
             value = stack_pointer[-1];
-            if (!PyList_CheckExact(value)) goto deoptimize;
+            if (!PyList_CheckExact(value)) goto side_exit;
             STAT_INC(TO_BOOL, hit);
             res = Py_SIZE(value) ? Py_True : Py_False;
             Py_DECREF(value);
             PyObject *res;
             value = stack_pointer[-1];
             // This one is a bit weird, because we expect *some* failures:
-            if (!Py_IsNone(value)) goto deoptimize;
+            if (!Py_IsNone(value)) goto side_exit;
             STAT_INC(TO_BOOL, hit);
             res = Py_False;
             stack_pointer[-1] = res;
             PyObject *value;
             PyObject *res;
             value = stack_pointer[-1];
-            if (!PyUnicode_CheckExact(value)) goto deoptimize;
+            if (!PyUnicode_CheckExact(value)) goto side_exit;
             STAT_INC(TO_BOOL, hit);
             if (value == &_Py_STR(empty)) {
                 assert(_Py_IsImmortal(value));
             uint32_t version = (uint32_t)CURRENT_OPERAND();
             // This one is a bit weird, because we expect *some* failures:
             assert(version);
-            if (Py_TYPE(value)->tp_version_tag != version) goto deoptimize;
+            if (Py_TYPE(value)->tp_version_tag != version) goto side_exit;
             STAT_INC(TO_BOOL, hit);
             Py_DECREF(value);
             res = Py_True;
             PyObject *left;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
-            if (!PyLong_CheckExact(left)) goto deoptimize;
-            if (!PyLong_CheckExact(right)) goto deoptimize;
+            if (!PyLong_CheckExact(left)) goto side_exit;
+            if (!PyLong_CheckExact(right)) goto side_exit;
             break;
         }
 
             PyObject *left;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
-            if (!PyFloat_CheckExact(left)) goto deoptimize;
-            if (!PyFloat_CheckExact(right)) goto deoptimize;
+            if (!PyFloat_CheckExact(left)) goto side_exit;
+            if (!PyFloat_CheckExact(right)) goto side_exit;
             break;
         }
 
             PyObject *left;
             right = stack_pointer[-1];
             left = stack_pointer[-2];
-            if (!PyUnicode_CheckExact(left)) goto deoptimize;
-            if (!PyUnicode_CheckExact(right)) goto deoptimize;
+            if (!PyUnicode_CheckExact(left)) goto side_exit;
+            if (!PyUnicode_CheckExact(right)) goto side_exit;
             break;
         }
 
             uint32_t type_version = (uint32_t)CURRENT_OPERAND();
             PyTypeObject *tp = Py_TYPE(owner);
             assert(type_version != 0);
-            if (tp->tp_version_tag != type_version) goto deoptimize;
+            if (tp->tp_version_tag != type_version) goto side_exit;
             break;
         }
 
             break;
         }
 
-        /* _JUMP_BACKWARD is not a viable micro-op for tier 2 */
-
         /* _POP_JUMP_IF_FALSE is not a viable micro-op for tier 2 */
 
         /* _POP_JUMP_IF_TRUE is not a viable micro-op for tier 2 */
             PyObject *flag;
             flag = stack_pointer[-1];
             stack_pointer += -1;
-            if (!Py_IsTrue(flag)) goto deoptimize;
+            if (!Py_IsTrue(flag)) goto side_exit;
             assert(Py_IsTrue(flag));
             break;
         }
             PyObject *flag;
             flag = stack_pointer[-1];
             stack_pointer += -1;
-            if (!Py_IsFalse(flag)) goto deoptimize;
+            if (!Py_IsFalse(flag)) goto side_exit;
             assert(Py_IsFalse(flag));
             break;
         }
             stack_pointer += -1;
             if (!Py_IsNone(val)) {
                 Py_DECREF(val);
-                if (1) goto deoptimize;
+                if (1) goto side_exit;
             }
             break;
         }
             PyObject *val;
             val = stack_pointer[-1];
             stack_pointer += -1;
-            if (Py_IsNone(val)) goto deoptimize;
+            if (Py_IsNone(val)) goto side_exit;
             Py_DECREF(val);
             break;
         }
 
         case _JUMP_TO_TOP: {
-            next_uop = current_executor->trace;
+            #ifndef _Py_JIT
+            next_uop = &current_executor->trace[1];
+            #endif
             CHECK_EVAL_BREAKER();
             break;
         }
 
         case _EXIT_TRACE: {
             TIER_TWO_ONLY
-            if (1) goto deoptimize;
+            if (1) goto side_exit;
             break;
         }
 
             break;
         }
 
+        case _COLD_EXIT: {
+            oparg = CURRENT_OPARG();
+            TIER_TWO_ONLY
+            _PyExecutorObject *previous = (_PyExecutorObject *)tstate->previous_executor;
+            _PyExitData *exit = &previous->exits[oparg];
+            exit->temperature++;
+            PyCodeObject *code = _PyFrame_GetCode(frame);
+            _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target;
+            if (exit->temperature < (int32_t)tstate->interp->optimizer_side_threshold) {
+                GOTO_TIER_ONE(target);
+            }
+            _PyExecutorObject *executor;
+            if (target->op.code == ENTER_EXECUTOR) {
+                executor = code->co_executors->executors[target->op.arg];
+                Py_INCREF(executor);
+            } else {
+                int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor);
+                if (optimized <= 0) {
+                    int32_t new_temp = -1 * tstate->interp->optimizer_side_threshold;
+                    exit->temperature = (new_temp < INT16_MIN) ? INT16_MIN : new_temp;
+                    if (optimized < 0) {
+                        Py_DECREF(previous);
+                        tstate->previous_executor = Py_None;
+                        if (1) goto error_tier_two;
+                    }
+                    GOTO_TIER_ONE(target);
+                }
+            }
+            /* We need two references. One to store in exit->executor and
+             * one to keep the executor alive when executing. */
+            Py_INCREF(executor);
+            exit->executor = executor;
+            GOTO_TIER_TWO(executor);
+            break;
+        }
+
+        case _START_EXECUTOR: {
+            PyObject *executor = (PyObject *)CURRENT_OPERAND();
+            TIER_TWO_ONLY
+            Py_DECREF(tstate->previous_executor);
+            tstate->previous_executor = NULL;
+            #ifndef _Py_JIT
+            current_executor = (_PyExecutorObject*)executor;
+            #endif
+            break;
+        }
+
+        case _FATAL_ERROR: {
+            TIER_TWO_ONLY
+            assert(0);
+            Py_FatalError("Fatal error uop executed.");
+            break;
+        }
+
         case _CHECK_VALIDITY_AND_SET_IP: {
             PyObject *instr_ptr = (PyObject *)CURRENT_OPERAND();
             TIER_TWO_ONLY
index 6c19adc60c690f3c95b9bbd602c58780a02b76f7..a520d042a4aa1e45475fd9ecae8786cabe727d55 100644 (file)
             INSTRUCTION_STATS(CACHE);
             TIER_ONE_ONLY
             assert(0 && "Executing a cache.");
-            Py_UNREACHABLE();
+            Py_FatalError("Executing a cache.");
+            DISPATCH();
         }
 
         TARGET(CALL) {
             TIER_ONE_ONLY
             CHECK_EVAL_BREAKER();
             PyCodeObject *code = _PyFrame_GetCode(frame);
-            current_executor = code->co_executors->executors[oparg & 255];
-            assert(current_executor->vm_data.index == INSTR_OFFSET() - 1);
-            assert(current_executor->vm_data.code == code);
-            assert(current_executor->vm_data.valid);
-            Py_INCREF(current_executor);
-            GOTO_TIER_TWO();
+            _PyExecutorObject *executor = code->co_executors->executors[oparg & 255];
+            assert(executor->vm_data.index == INSTR_OFFSET() - 1);
+            assert(executor->vm_data.code == code);
+            assert(executor->vm_data.valid);
+            assert(tstate->previous_executor == NULL);
+            tstate->previous_executor = Py_None;
+            Py_INCREF(executor);
+            GOTO_TIER_TWO(executor);
             DISPATCH();
         }
 
             next_instr += 2;
             INSTRUCTION_STATS(JUMP_BACKWARD);
             /* Skip 1 cache entry */
+            TIER_ONE_ONLY
             CHECK_EVAL_BREAKER();
             assert(oparg <= INSTR_OFFSET());
             JUMPBY(-oparg);
                     oparg >>= 8;
                     start--;
                 }
-                int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer);
+                _PyExecutorObject *executor;
+                int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer, &executor);
                 if (optimized < 0) goto error;
                 if (optimized) {
-                    // Rewind and enter the executor:
-                    assert(start->op.code == ENTER_EXECUTOR);
-                    next_instr = start;
-                    this_instr[1].cache &= OPTIMIZER_BITS_MASK;
+                    assert(tstate->previous_executor == NULL);
+                    tstate->previous_executor = Py_None;
+                    GOTO_TIER_TWO(executor);
                 }
                 else {
                     int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK;
             INSTRUCTION_STATS(RESERVED);
             TIER_ONE_ONLY
             assert(0 && "Executing RESERVED instruction.");
-            Py_UNREACHABLE();
+            Py_FatalError("Executing RESERVED instruction.");
+            DISPATCH();
         }
 
         TARGET(RESUME) {
index 22949c082da05a2e67338bf752d480a001e1998d..839414bd81067786d3cdbd807bf58b95928f3b0d 100644 (file)
@@ -300,13 +300,13 @@ emit(const StencilGroup *group, uint64_t patches[])
 
 // Compiles executor in-place. Don't forget to call _PyJIT_Free later!
 int
-_PyJIT_Compile(_PyExecutorObject *executor, _PyUOpInstruction *trace, size_t length)
+_PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size_t length)
 {
     // Loop once to find the total compiled size:
     size_t code_size = 0;
     size_t data_size = 0;
     for (size_t i = 0; i < length; i++) {
-        _PyUOpInstruction *instruction = &trace[i];
+        _PyUOpInstruction *instruction = (_PyUOpInstruction *)&trace[i];
         const StencilGroup *group = &stencil_groups[instruction->opcode];
         code_size += group->code.body_size;
         data_size += group->data.body_size;
@@ -323,8 +323,13 @@ _PyJIT_Compile(_PyExecutorObject *executor, _PyUOpInstruction *trace, size_t len
     // Loop again to emit the code:
     char *code = memory;
     char *data = memory + code_size;
+    char *top = code;
+    if (trace[0].opcode == _START_EXECUTOR) {
+        // Don't want to execute this more than once:
+        top += stencil_groups[_START_EXECUTOR].code.body_size;
+    }
     for (size_t i = 0; i < length; i++) {
-        _PyUOpInstruction *instruction = &trace[i];
+        _PyUOpInstruction *instruction = (_PyUOpInstruction *)&trace[i];
         const StencilGroup *group = &stencil_groups[instruction->opcode];
         // Think of patches as a dictionary mapping HoleValue to uint64_t:
         uint64_t patches[] = GET_PATCHES();
@@ -335,7 +340,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, _PyUOpInstruction *trace, size_t len
         patches[HoleValue_OPARG] = instruction->oparg;
         patches[HoleValue_OPERAND] = instruction->operand;
         patches[HoleValue_TARGET] = instruction->target;
-        patches[HoleValue_TOP] = (uint64_t)memory;
+        patches[HoleValue_TOP] = (uint64_t)top;
         patches[HoleValue_ZERO] = 0;
         emit(group, patches);
         code += group->code.body_size;
index efa19680c9b1f30aaf9b628e9f7e64a54b32deb2..acc1d545d2b8adf8e20b706d4dbe7034a499088a 100644 (file)
@@ -2,6 +2,7 @@
 #include "opcode.h"
 #include "pycore_interp.h"
 #include "pycore_bitutils.h"        // _Py_popcount32()
+#include "pycore_object.h"          // _PyObject_GC_UNTRACK()
 #include "pycore_opcode_metadata.h" // _PyOpcode_OpName[]
 #include "pycore_opcode_utils.h"  // MAX_REAL_OPCODE
 #include "pycore_optimizer.h"     // _Py_uop_analyze_and_optimize()
@@ -128,10 +129,11 @@ static _PyOptimizerObject _PyOptimizer_Default = {
     .optimize = never_optimize,
     .resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
     .backedge_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
+    .side_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD,
 };
 
 static uint32_t
-shift_and_offset_threshold(uint16_t threshold)
+shift_and_offset_threshold(uint32_t threshold)
 {
     return (threshold << OPTIMIZER_BITS_IN_COUNTER) + (1 << 15);
 }
@@ -140,41 +142,74 @@ _PyOptimizerObject *
 PyUnstable_GetOptimizer(void)
 {
     PyInterpreterState *interp = _PyInterpreterState_GET();
-    if (interp->optimizer == &_PyOptimizer_Default) {
-        return NULL;
-    }
     assert(interp->optimizer_backedge_threshold ==
            shift_and_offset_threshold(interp->optimizer->backedge_threshold));
     assert(interp->optimizer_resume_threshold ==
            shift_and_offset_threshold(interp->optimizer->resume_threshold));
+    if (interp->optimizer == &_PyOptimizer_Default) {
+        return NULL;
+    }
     Py_INCREF(interp->optimizer);
     return interp->optimizer;
 }
 
+static _PyExecutorObject *
+make_executor_from_uops(_PyUOpInstruction *buffer, const _PyBloomFilter *dependencies);
+
+static int
+init_cold_exit_executor(_PyExecutorObject *executor, int oparg);
+
+static int cold_exits_initialized = 0;
+static _PyExecutorObject COLD_EXITS[UOP_MAX_TRACE_LENGTH] = { 0 };
+
+static const _PyBloomFilter EMPTY_FILTER = { 0 };
+
 _PyOptimizerObject *
 _Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer)
 {
     if (optimizer == NULL) {
         optimizer = &_PyOptimizer_Default;
     }
+    else if (cold_exits_initialized == 0) {
+        cold_exits_initialized = 1;
+        for (int i = 0; i < UOP_MAX_TRACE_LENGTH; i++) {
+            if (init_cold_exit_executor(&COLD_EXITS[i], i)) {
+                return NULL;
+            }
+        }
+    }
     _PyOptimizerObject *old = interp->optimizer;
+    if (old == NULL) {
+        old = &_PyOptimizer_Default;
+    }
     Py_INCREF(optimizer);
     interp->optimizer = optimizer;
     interp->optimizer_backedge_threshold = shift_and_offset_threshold(optimizer->backedge_threshold);
     interp->optimizer_resume_threshold = shift_and_offset_threshold(optimizer->resume_threshold);
+    interp->optimizer_side_threshold = optimizer->side_threshold;
+    if (optimizer == &_PyOptimizer_Default) {
+        assert(interp->optimizer_backedge_threshold > (1 << 16));
+        assert(interp->optimizer_resume_threshold > (1 << 16));
+    }
     return old;
 }
 
-void
+int
 PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
 {
     PyInterpreterState *interp = _PyInterpreterState_GET();
     _PyOptimizerObject *old = _Py_SetOptimizer(interp, optimizer);
-    Py_DECREF(old);
+    Py_XDECREF(old);
+    return old == NULL ? -1 : 0;
 }
 
+/* Returns 1 if optimized, 0 if not optimized, and -1 for an error.
+ * If optimized, *executor_ptr contains a new reference to the executor
+ */
 int
-_PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer)
+_PyOptimizer_Optimize(
+    _PyInterpreterFrame *frame, _Py_CODEUNIT *start,
+    PyObject **stack_pointer, _PyExecutorObject **executor_ptr)
 {
     PyCodeObject *code = (PyCodeObject *)frame->f_executable;
     assert(PyCode_Check(code));
@@ -183,12 +218,11 @@ _PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject
         return 0;
     }
     _PyOptimizerObject *opt = interp->optimizer;
-    _PyExecutorObject *executor = NULL;
-    int err = opt->optimize(opt, frame, start, &executor, (int)(stack_pointer - _PyFrame_Stackbase(frame)));
+    int err = opt->optimize(opt, frame, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(frame)));
     if (err <= 0) {
-        assert(executor == NULL);
         return err;
     }
+    assert(*executor_ptr != NULL);
     int index = get_index_for_executor(code, start);
     if (index < 0) {
         /* Out of memory. Don't raise and assume that the
@@ -197,11 +231,11 @@ _PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject
          * If an optimizer has already produced an executor,
          * it might get confused by the executor disappearing,
          * but there is not much we can do about that here. */
-        Py_DECREF(executor);
+        Py_DECREF(*executor_ptr);
         return 0;
     }
-    insert_executor(code, start, index, executor);
-    Py_DECREF(executor);
+    insert_executor(code, start, index, *executor_ptr);
+    assert((*executor_ptr)->vm_data.valid);
     return 1;
 }
 
@@ -237,11 +271,12 @@ static PyMethodDef executor_methods[] = {
 
 static void
 uop_dealloc(_PyExecutorObject *self) {
+    _PyObject_GC_UNTRACK(self);
     _Py_ExecutorClear(self);
 #ifdef _Py_JIT
     _PyJIT_Free(self);
 #endif
-    PyObject_Free(self);
+    PyObject_GC_Del(self);
 }
 
 const char *
@@ -253,7 +288,7 @@ _PyUOpName(int index)
 static Py_ssize_t
 uop_len(_PyExecutorObject *self)
 {
-    return Py_SIZE(self);
+    return self->code_size;
 }
 
 static PyObject *
@@ -292,15 +327,34 @@ PySequenceMethods uop_as_sequence = {
     .sq_item = (ssizeargfunc)uop_item,
 };
 
+static int
+executor_clear(PyObject *o)
+{
+    _Py_ExecutorClear((_PyExecutorObject *)o);
+    return 0;
+}
+
+static int
+executor_traverse(PyObject *o, visitproc visit, void *arg)
+{
+    _PyExecutorObject *executor = (_PyExecutorObject *)o;
+    for (uint32_t i = 0; i < executor->exit_count; i++) {
+        Py_VISIT(executor->exits[i].executor);
+    }
+    return 0;
+}
+
 PyTypeObject _PyUOpExecutor_Type = {
     PyVarObject_HEAD_INIT(&PyType_Type, 0)
     .tp_name = "uop_executor",
-    .tp_basicsize = offsetof(_PyExecutorObject, trace),
-    .tp_itemsize = sizeof(_PyUOpInstruction),
-    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
+    .tp_basicsize = offsetof(_PyExecutorObject, exits),
+    .tp_itemsize = 1,
+    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC,
     .tp_dealloc = (destructor)uop_dealloc,
     .tp_as_sequence = &uop_as_sequence,
     .tp_methods = executor_methods,
+    .tp_traverse = executor_traverse,
+    .tp_clear = executor_clear,
 };
 
 /* TO DO -- Generate these tables */
@@ -324,6 +378,7 @@ BRANCH_TO_GUARD[4][2] = {
     [POP_JUMP_IF_NOT_NONE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_NOT_NONE_POP,
 };
 
+
 #define CONFIDENCE_RANGE 1000
 #define CONFIDENCE_CUTOFF 333
 
@@ -726,9 +781,10 @@ done:
  * NOPs are excluded from the count.
 */
 static int
-compute_used(_PyUOpInstruction *buffer, uint32_t *used)
+compute_used(_PyUOpInstruction *buffer, uint32_t *used, int *exit_count_ptr)
 {
     int count = 0;
+    int exit_count = 0;
     SET_BIT(used, 0);
     for (int i = 0; i < UOP_MAX_TRACE_LENGTH; i++) {
         if (!BIT_IS_SET(used, i)) {
@@ -736,6 +792,9 @@ compute_used(_PyUOpInstruction *buffer, uint32_t *used)
         }
         count++;
         int opcode = buffer[i].opcode;
+        if (_PyUop_Flags[opcode] & HAS_EXIT_FLAG) {
+            exit_count++;
+        }
         if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) {
             continue;
         }
@@ -751,44 +810,76 @@ compute_used(_PyUOpInstruction *buffer, uint32_t *used)
             UNSET_BIT(used, i);
         }
     }
+    *exit_count_ptr = exit_count;
     return count;
 }
 
+/* Executor side exits */
+
+static _PyExecutorObject *
+allocate_executor(int exit_count, int length)
+{
+    int size = exit_count*sizeof(_PyExitData) + length*sizeof(_PyUOpInstruction);
+    _PyExecutorObject *res = PyObject_GC_NewVar(_PyExecutorObject, &_PyUOpExecutor_Type, size);
+    if (res == NULL) {
+        return NULL;
+    }
+    res->trace = (_PyUOpInstruction *)(res->exits + exit_count);
+    res->code_size = length;
+    res->exit_count = exit_count;
+    return res;
+}
+
 /* Makes an executor from a buffer of uops.
  * Account for the buffer having gaps and NOPs by computing a "used"
  * bit vector and only copying the used uops. Here "used" means reachable
  * and not a NOP.
  */
 static _PyExecutorObject *
-make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies)
+make_executor_from_uops(_PyUOpInstruction *buffer, const _PyBloomFilter *dependencies)
 {
     uint32_t used[(UOP_MAX_TRACE_LENGTH + 31)/32] = { 0 };
-    int length = compute_used(buffer, used);
-    _PyExecutorObject *executor = PyObject_NewVar(_PyExecutorObject, &_PyUOpExecutor_Type, length);
+    int exit_count;
+    int length = compute_used(buffer, used, &exit_count);
+    _PyExecutorObject *executor = allocate_executor(exit_count, length+1);
     if (executor == NULL) {
         return NULL;
     }
-    int dest = length - 1;
+    /* Initialize exits */
+    for (int i = 0; i < exit_count; i++) {
+        executor->exits[i].executor = &COLD_EXITS[i];
+        executor->exits[i].temperature = 0;
+    }
+    int next_exit = exit_count-1;
+    _PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length];
     /* Scan backwards, so that we see the destinations of jumps before the jumps themselves. */
     for (int i = UOP_MAX_TRACE_LENGTH-1; i >= 0; i--) {
         if (!BIT_IS_SET(used, i)) {
             continue;
         }
-        executor->trace[dest] = buffer[i];
+        *dest = buffer[i];
         int opcode = buffer[i].opcode;
         if (opcode == _POP_JUMP_IF_FALSE ||
             opcode == _POP_JUMP_IF_TRUE)
         {
             /* The oparg of the target will already have been set to its new offset */
-            int oparg = executor->trace[dest].oparg;
-            executor->trace[dest].oparg = buffer[oparg].oparg;
+            int oparg = dest->oparg;
+            dest->oparg = buffer[oparg].oparg;
+        }
+        if (_PyUop_Flags[opcode] & HAS_EXIT_FLAG) {
+            executor->exits[next_exit].target = buffer[i].target;
+            dest->exit_index = next_exit;
+            next_exit--;
         }
         /* Set the oparg to be the destination offset,
          * so that we can set the oparg of earlier jumps correctly. */
-        buffer[i].oparg = dest;
+        buffer[i].oparg = (uint16_t)(dest - executor->trace);
         dest--;
     }
-    assert(dest == -1);
+    assert(next_exit == -1);
+    assert(dest == executor->trace);
+    dest->opcode = _START_EXECUTOR;
+    dest->operand = (uintptr_t)executor;
     _Py_ExecutorInit(executor, dependencies);
 #ifdef Py_DEBUG
     char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
@@ -811,14 +902,40 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies)
 #ifdef _Py_JIT
     executor->jit_code = NULL;
     executor->jit_size = 0;
-    if (_PyJIT_Compile(executor, executor->trace, Py_SIZE(executor))) {
+    if (_PyJIT_Compile(executor, executor->trace, length+1)) {
         Py_DECREF(executor);
         return NULL;
     }
 #endif
+    _PyObject_GC_TRACK(executor);
     return executor;
 }
 
+static int
+init_cold_exit_executor(_PyExecutorObject *executor, int oparg)
+{
+    _Py_SetImmortal(executor);
+    Py_SET_TYPE(executor, &_PyUOpExecutor_Type);
+    executor->trace = (_PyUOpInstruction *)executor->exits;
+    executor->code_size = 1;
+    executor->exit_count = 0;
+    _PyUOpInstruction *inst = (_PyUOpInstruction *)&executor->trace[0];
+    inst->opcode = _COLD_EXIT;
+    inst->oparg = oparg;
+    executor->vm_data.valid = true;
+    for (int i = 0; i < BLOOM_FILTER_WORDS; i++) {
+        assert(executor->vm_data.bloom.bits[i] == 0);
+    }
+#ifdef _Py_JIT
+    executor->jit_code = NULL;
+    executor->jit_size = 0;
+    if (_PyJIT_Compile(executor, executor->trace, 1)) {
+        return -1;
+    }
+#endif
+    return 0;
+}
+
 static int
 uop_optimize(
     _PyOptimizerObject *self,
@@ -880,13 +997,15 @@ PyUnstable_Optimizer_NewUOpOptimizer(void)
     opt->resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
     // Need a few iterations to settle specializations,
     // and to ammortize the cost of optimization.
+    opt->side_threshold = 16;
     opt->backedge_threshold = 16;
     return (PyObject *)opt;
 }
 
 static void
 counter_dealloc(_PyExecutorObject *self) {
-    PyObject *opt = (PyObject *)self->trace[0].operand;
+    /* The optimizer is the operand of the second uop. */
+    PyObject *opt = (PyObject *)self->trace[1].operand;
     Py_DECREF(opt);
     uop_dealloc(self);
 }
@@ -894,11 +1013,13 @@ counter_dealloc(_PyExecutorObject *self) {
 PyTypeObject _PyCounterExecutor_Type = {
     PyVarObject_HEAD_INIT(&PyType_Type, 0)
     .tp_name = "counting_executor",
-    .tp_basicsize = offsetof(_PyExecutorObject, trace),
-    .tp_itemsize = sizeof(_PyUOpInstruction),
-    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
+    .tp_basicsize = offsetof(_PyExecutorObject, exits),
+    .tp_itemsize = 1,
+    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC,
     .tp_dealloc = (destructor)counter_dealloc,
     .tp_methods = executor_methods,
+    .tp_traverse = executor_traverse,
+    .tp_clear = executor_clear,
 };
 
 static int
@@ -926,9 +1047,7 @@ counter_optimize(
         { .opcode = _INTERNAL_INCREMENT_OPT_COUNTER },
         { .opcode = _EXIT_TRACE, .target = (uint32_t)(target - _PyCode_CODE(code)) }
     };
-    _PyBloomFilter empty;
-    _Py_BloomFilter_Init(&empty);
-    _PyExecutorObject *executor = make_executor_from_uops(buffer, &empty);
+    _PyExecutorObject *executor = make_executor_from_uops(buffer, &EMPTY_FILTER);
     if (executor == NULL) {
         return -1;
     }
@@ -968,6 +1087,7 @@ PyUnstable_Optimizer_NewCounter(void)
     }
     opt->base.optimize = counter_optimize;
     opt->base.resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
+    opt->base.side_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD;
     opt->base.backedge_threshold = 0;
     opt->count = 0;
     return (PyObject *)opt;
@@ -1091,9 +1211,6 @@ link_executor(_PyExecutorObject *executor)
 static void
 unlink_executor(_PyExecutorObject *executor)
 {
-    if (!executor->vm_data.valid) {
-        return;
-    }
     _PyExecutorLinkListNode *links = &executor->vm_data.links;
     _PyExecutorObject *next = links->next;
     _PyExecutorObject *prev = links->previous;
@@ -1114,7 +1231,7 @@ unlink_executor(_PyExecutorObject *executor)
 
 /* This must be called by optimizers before using the executor */
 void
-_Py_ExecutorInit(_PyExecutorObject *executor, _PyBloomFilter *dependency_set)
+_Py_ExecutorInit(_PyExecutorObject *executor, const _PyBloomFilter *dependency_set)
 {
     executor->vm_data.valid = true;
     for (int i = 0; i < BLOOM_FILTER_WORDS; i++) {
@@ -1127,11 +1244,19 @@ _Py_ExecutorInit(_PyExecutorObject *executor, _PyBloomFilter *dependency_set)
 void
 _Py_ExecutorClear(_PyExecutorObject *executor)
 {
+    if (!executor->vm_data.valid) {
+        return;
+    }
     unlink_executor(executor);
     PyCodeObject *code = executor->vm_data.code;
     if (code == NULL) {
         return;
     }
+    for (uint32_t i = 0; i < executor->exit_count; i++) {
+        Py_DECREF(executor->exits[i].executor);
+        executor->exits[i].executor = &COLD_EXITS[i];
+        executor->exits[i].temperature = INT16_MIN;
+    }
     _Py_CODEUNIT *instruction = &_PyCode_CODE(code)[executor->vm_data.index];
     assert(instruction->op.code == ENTER_EXECUTOR);
     int index = instruction->op.arg;
index b354c033ae77274d13ed1ad42132278709b4077c..7b537af8c87697aca9845c653abab244cd975bac 100644 (file)
@@ -1261,7 +1261,9 @@ init_interp_main(PyThreadState *tstate)
             if (opt == NULL) {
                 return _PyStatus_ERR("can't initialize optimizer");
             }
-            PyUnstable_SetOptimizer((_PyOptimizerObject *)opt);
+            if (PyUnstable_SetOptimizer((_PyOptimizerObject *)opt)) {
+                return _PyStatus_ERR("can't initialize optimizer");
+            }
             Py_DECREF(opt);
         }
     }
index c2ccc276449d4f36d7f456e2b7fbbe6da4d0b2b4..3484beab7aaed4937c663917767a54f4d3de5659 100644 (file)
@@ -782,6 +782,7 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
     }
 
     _PyOptimizerObject *old = _Py_SetOptimizer(interp, NULL);
+    assert(old != NULL);
     Py_DECREF(old);
 
     /* It is possible that any of the objects below have a finalizer
@@ -1346,6 +1347,7 @@ init_threadstate(_PyThreadStateImpl *_tstate,
     tstate->datastack_top = NULL;
     tstate->datastack_limit = NULL;
     tstate->what_event = -1;
+    tstate->previous_executor = NULL;
 
 #ifdef Py_GIL_DISABLED
     // Initialize biased reference counting inter-thread queue
diff --git a/Python/tier2_engine.md b/Python/tier2_engine.md
new file mode 100644 (file)
index 0000000..df9f6c1
--- /dev/null
@@ -0,0 +1,150 @@
+# The tier 2 execution engine
+
+## General idea
+
+When execution in tier 1 becomes "hot", that is the counter for that point in
+the code reaches some threshold, we create an executor and execute that
+instead of the tier 1 bytecode.
+
+Since each executor must exit, we also track the "hotness" of those
+exits and attach new executors to those exits.
+
+As the program executes, and the hot parts of the program get optimized,
+a graph of executors forms.
+
+## Superblocks and Executors
+
+Once a point in the code has become hot enough, we want to optimize it.
+Starting from that point we project the likely path of execution,
+using information gathered by tier 1 to guide that projection to
+form a "superblock", a mostly linear sequence of micro-ops.
+Although mostly linear, it may include a single loop.
+
+We then optimize this superblock to form an optimized superblock,
+which is equivalent but more efficient.
+
+A superblock is a representation of the code we want to execute,
+but it is not in executable form.
+The executable form is known as an executor.
+
+Executors are semantically equivalent to the superblock they are
+created from, but are in a form that can be efficiently executable.
+
+There are two execution engines for executors, and two types of executors:
+* The hardware which runs machine code executors created by the JIT compiler.
+* The tier 2 interpreter runs bytecode executors.
+
+It would be very wasteful to support both a tier 2 interpreter and
+JIT compiler in the same process.
+For now, we will make the choice of engine a configuration option,
+but we could make it a command line option in the future if that would prove useful.
+
+
+### Tier 2 Interpreter
+
+For platforms without a JIT and for testing, we need an interpreter
+for executors. It is similar in design to the tier 1 interpreter, but has a
+different instruction set, and does not adapt.
+
+### JIT compiler
+
+The JIT compiler converts superblocks into machine code executors.
+These have identical behavior to interpreted executors, except that
+they consume more memory for the generated machine code and are a lot faster.
+
+## Transfering control
+
+There are three types of control transfer that we need to consider:
+* Tier 1 to tier 2
+* Tier 2 to tier 1
+* One executor to another within tier 2
+
+Since we expect the graph of executors to span most of the hot
+part of the program, transfers from one executor to another should
+be the most common.
+Therefore, we want to make those transfers fast.
+
+### Tier 2 to tier 2
+
+#### Cold exits
+
+All side exits start cold and most stay cold, but a few become
+hot. We want to keep the memory consumption small for the many
+cold exits, but those that become hot need to be fast.
+However we cannot know in advance, which will be which.
+
+So that tier 2 to tier 2 transfers are fast for hot exits,
+exits must be implemented as executors. In order to patch
+executor exits when they get hot, a pointer to the current
+executor must be passed to the exit executor.
+
+#### Handling reference counts
+
+There must be an implicit reference to the currently executing
+executor, otherwise it might be freed.
+Consequently, we must increment the reference count of an
+executor just before executing it, and decrement it just after
+executing it.
+
+We want to minimize the amount of data that is passed from
+one executor to the next. In the JIT, this reduces the number
+of arguments in the tailcall, freeing up registers for other uses.
+It is less important in the interpreter, but following the same
+design as the JIT simplifies debugging and is good for performance.
+
+Provided that we incref the new executor before executing it, we
+can jump directly to the code of the executor, without needing
+to pass a reference to that executor object.
+However, we do need a reference to the previous executor,
+so that it can be decref'd and for handling of cold exits.
+To avoid messing up the JIT's register allocation, we pass a
+reference to the previous executor in the thread state's
+`previous_executor` field.
+
+#### The interpreter
+
+The tier 2 interpreter has a variable `current_executor` which
+points to the currently live executor. When transfering from executor
+`A` to executor `B` we do the following:
+(Initially `current_executor` points to `A`, and the refcount of
+`A` is elevated by one)
+
+1. Set the instruction pointer to start at the beginning of `B`
+2. Increment the reference count of `B`
+3. Start executing `B`
+
+We also make the first instruction in `B` do the following:
+1. Set `current_executor` to point to `B`
+2. Decrement the reference count of `A` (`A` is referenced by `tstate->previous_executor`)
+
+The net effect of the above is to safely decrement the refcount of `A`,
+increment the refcount of `B` and set `current_executor` to point to `B`.
+
+#### In the JIT
+
+Transfering control from one executor to another is done via tailcalls.
+
+The compiled executor should do the same, except that there is no local
+variable `current_executor`.
+
+### Tier 1 to tier 2
+
+Since the executor doesn't know if the previous code was tier 1 or tier 2,
+we need to make a transfer from tier 1 to tier 2 look like a tier 2 to tier 2
+transfer to the executor.
+
+We can then perform a tier 1 to tier 2 transfer by setting `current_executor`
+to `None`, and then performing a tier 2 to tier 2 transfer as above.
+
+### Tier 2 to tier 1
+
+Each micro-op that might exit to tier 1 contains a `target` value,
+which is the offset of the tier 1 instruction to exit to in the
+current code object.
+
+## Counters
+
+TO DO.
+The implementation will change soon, so there is no point in
+documenting it until then.
+
index be2fbb9106fffc5ca66b5b8aadd797f44178bb0a..98f0bdca01f01d56f96897ca5ed841f00a54b861 100644 (file)
             break;
         }
 
-        /* _JUMP_BACKWARD is not a viable micro-op for tier 2 */
-
         /* _POP_JUMP_IF_FALSE is not a viable micro-op for tier 2 */
 
         /* _POP_JUMP_IF_TRUE is not a viable micro-op for tier 2 */
             break;
         }
 
+        case _COLD_EXIT: {
+            break;
+        }
+
+        case _START_EXECUTOR: {
+            break;
+        }
+
+        case _FATAL_ERROR: {
+            break;
+        }
+
         case _CHECK_VALIDITY_AND_SET_IP: {
             break;
         }
index 61cd41ea8f31c193ffcba648cab24e89c4de5579..8482bf849aaacd314d8a82141c8301ffdb777df0 100644 (file)
@@ -321,6 +321,7 @@ MAX_SIZES = {
     _abs('Objects/stringlib/unicode_format.h'): (10_000, 400),
     _abs('Objects/typeobject.c'): (35_000, 200),
     _abs('Python/compile.c'): (20_000, 500),
+    _abs('Python/optimizer.c'): (100_000, 5_000),
     _abs('Python/parking_lot.c'): (40_000, 1000),
     _abs('Python/pylifecycle.c'): (500_000, 5000),
     _abs('Python/pystate.c'): (500_000, 5000),
index 14bcd85b9eae59a4ee624a106705d2f60bdebc6f..94be3375849597791a579b6f125a3bca8f5995ee 100644 (file)
@@ -382,6 +382,11 @@ Python/optimizer.c -       _PyCounterOptimizer_Type        -
 Python/optimizer.c     -       _PyUOpExecutor_Type     -
 Python/optimizer.c     -       _PyUOpOptimizer_Type    -
 Python/optimizer.c     -       _PyOptimizer_Default    -
+Python/optimizer.c     -       _ColdExit_Type  -
+Python/optimizer.c     -       COLD_EXITS      -
+Python/optimizer.c     -       Py_FatalErrorExecutor   -
+Python/optimizer.c     -       EMPTY_FILTER    -
+Python/optimizer.c     -       cold_exits_initialized  -
 
 ##-----------------------
 ## test code
index 3497b7fcdf35d37fffb123f4ed1d0db7062a4b23..bcffd75269ac0bf7de9db250cfa60583f6938e00 100644 (file)
@@ -21,6 +21,7 @@ class Properties:
     uses_co_names: bool
     uses_locals: bool
     has_free: bool
+    side_exit: bool
 
     pure: bool
     passthrough: bool
@@ -48,6 +49,7 @@ class Properties:
             uses_co_names=any(p.uses_co_names for p in properties),
             uses_locals=any(p.uses_locals for p in properties),
             has_free=any(p.has_free for p in properties),
+            side_exit=any(p.side_exit for p in properties),
             pure=all(p.pure for p in properties),
             passthrough=all(p.passthrough for p in properties),
         )
@@ -69,6 +71,7 @@ SKIP_PROPERTIES = Properties(
     uses_co_names=False,
     uses_locals=False,
     has_free=False,
+    side_exit=False,
     pure=False,
     passthrough=False,
 )
@@ -269,9 +272,7 @@ def override_error(
 
 
 def convert_stack_item(item: parser.StackEffect) -> StackItem:
-    return StackItem(
-        item.name, item.type, item.cond, (item.size or "1")
-    )
+    return StackItem(item.name, item.type, item.cond, (item.size or "1"))
 
 
 def analyze_stack(op: parser.InstDef) -> StackEffect:
@@ -448,13 +449,24 @@ def compute_properties(op: parser.InstDef) -> Properties:
         or variable_used(op, "PyCell_GET")
         or variable_used(op, "PyCell_SET")
     )
+    deopts_if = variable_used(op, "DEOPT_IF")
+    exits_if = variable_used(op, "EXIT_IF")
+    if deopts_if and exits_if:
+        tkn = op.tokens[0]
+        raise lexer.make_syntax_error(
+            "Op cannot contain both EXIT_IF and DEOPT_IF",
+            tkn.filename,
+            tkn.line,
+            tkn.column,
+            op.name,
+        )
     infallible = is_infallible(op)
-    deopts = variable_used(op, "DEOPT_IF")
     passthrough = stack_effect_only_peeks(op) and infallible
     return Properties(
         escapes=makes_escaping_api_call(op),
         infallible=infallible,
-        deopts=deopts,
+        deopts=deopts_if or exits_if,
+        side_exit=exits_if,
         oparg=variable_used(op, "oparg"),
         jumps=variable_used(op, "JUMPBY"),
         eval_breaker=variable_used(op, "CHECK_EVAL_BREAKER"),
index 2fc2ab115321cf26793070edb8adb531a9b5e584..54ffea71b5350b8e4007bae9fa2188c30c77e802 100644 (file)
@@ -154,6 +154,7 @@ def replace_check_eval_breaker(
 
 
 REPLACEMENT_FUNCTIONS = {
+    "EXIT_IF": replace_deopt,
     "DEOPT_IF": replace_deopt,
     "ERROR_IF": replace_error,
     "DECREF_INPUTS": replace_decrefs,
@@ -205,6 +206,8 @@ def cflags(p: Properties) -> str:
         flags.append("HAS_EVAL_BREAK_FLAG")
     if p.deopts:
         flags.append("HAS_DEOPT_FLAG")
+    if p.side_exit:
+        flags.append("HAS_EXIT_FLAG")
     if not p.infallible:
         flags.append("HAS_ERROR_FLAG")
     if p.escapes:
index 3e9fa3e26daa539e98beac8776479e27a8aa1a27..52dc09e1499671d58b4e537b5d8b3045a582d68a 100644 (file)
@@ -50,6 +50,7 @@ FLAGS = [
     "DEOPT",
     "ERROR",
     "ESCAPES",
+    "EXIT",
     "PURE",
     "PASSTHROUGH",
 ]
index 7897b89b2752a72b3c01e6a58b34238b3959e761..8b4d1646dd5a87f01f358267ae55468cf924f2bf 100644 (file)
@@ -98,9 +98,25 @@ def tier2_replace_deopt(
     out.emit(") goto deoptimize;\n")
 
 
+def tier2_replace_exit_if(
+    out: CWriter,
+    tkn: Token,
+    tkn_iter: Iterator[Token],
+    uop: Uop,
+    unused: Stack,
+    inst: Instruction | None,
+) -> None:
+    out.emit_at("if ", tkn)
+    out.emit(next(tkn_iter))
+    emit_to(out, tkn_iter, "RPAREN")
+    next(tkn_iter)  # Semi colon
+    out.emit(") goto side_exit;\n")
+
+
 TIER2_REPLACEMENT_FUNCTIONS = REPLACEMENT_FUNCTIONS.copy()
 TIER2_REPLACEMENT_FUNCTIONS["ERROR_IF"] = tier2_replace_error
 TIER2_REPLACEMENT_FUNCTIONS["DEOPT_IF"] = tier2_replace_deopt
+TIER2_REPLACEMENT_FUNCTIONS["EXIT_IF"] = tier2_replace_exit_if
 
 
 def write_uop(uop: Uop, out: CWriter, stack: Stack) -> None:
index 12303a550d8879eacd4c7d98c01559751211d224..d79c6efb8f6de4e4a74505dcc2164ce7e97835ac 100644 (file)
         goto LABEL ## _tier_two; \
     } while (0)
 
+#undef GOTO_TIER_TWO
+#define GOTO_TIER_TWO(EXECUTOR) \
+do {  \
+    __attribute__((musttail))                     \
+    return ((jit_func)((EXECUTOR)->jit_code))(frame, stack_pointer, tstate); \
+} while (0)
+
+#undef GOTO_TIER_ONE
+#define GOTO_TIER_ONE(TARGET) \
+do {  \
+    _PyFrame_SetStackPointer(frame, stack_pointer); \
+    return TARGET; \
+} while (0)
+
 #undef LOAD_IP
 #define LOAD_IP(UNUSED) \
     do {                \
@@ -59,7 +73,6 @@ _JIT_ENTRY(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *
     PATCH_VALUE(_PyExecutorObject *, current_executor, _JIT_EXECUTOR)
     int oparg;
     int opcode = _JIT_OPCODE;
-    _PyUOpInstruction *next_uop;
     // Other stuff we need handy:
     PATCH_VALUE(uint16_t, _oparg, _JIT_OPARG)
     PATCH_VALUE(uint64_t, _operand, _JIT_OPERAND)
@@ -90,9 +103,16 @@ pop_2_error_tier_two:
 pop_1_error_tier_two:
     STACK_SHRINK(1);
 error_tier_two:
-    _PyFrame_SetStackPointer(frame, stack_pointer);
-    return NULL;
+    tstate->previous_executor = (PyObject *)current_executor;
+    GOTO_TIER_ONE(NULL);
 deoptimize:
-    _PyFrame_SetStackPointer(frame, stack_pointer);
-    return _PyCode_CODE(_PyFrame_GetCode(frame)) + _target;
+    tstate->previous_executor = (PyObject *)current_executor;
+    GOTO_TIER_ONE(_PyCode_CODE(_PyFrame_GetCode(frame)) + _target);
+side_exit:
+    {
+        _PyExitData *exit = &current_executor->exits[_target];
+        Py_INCREF(exit->executor);
+        tstate->previous_executor = (PyObject *)current_executor;
+        GOTO_TIER_TWO(exit->executor);
+    }
 }