]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
Move to thread state
authorKen Jin <28750310+Fidget-Spinner@users.noreply.github.com>
Fri, 7 Nov 2025 22:02:00 +0000 (22:02 +0000)
committerKen Jin <28750310+Fidget-Spinner@users.noreply.github.com>
Fri, 7 Nov 2025 22:02:00 +0000 (22:02 +0000)
Include/internal/pycore_interp_structs.h
Include/internal/pycore_tstate.h
Python/bytecodes.c
Python/ceval.c
Python/ceval_macros.h
Python/generated_cases.c.h
Python/optimizer.c
Python/pystate.c
Tools/cases_generator/generators_common.py

index a2b5afa8736c733913c6ac17b669ed33351893bc..39023923c7e2b710d758254c772aaee587bbcf21 100644 (file)
@@ -14,8 +14,6 @@ extern "C" {
 #include "pycore_structs.h"       // PyHamtObject
 #include "pycore_tstate.h"        // _PyThreadStateImpl
 #include "pycore_typedefs.h"      // _PyRuntimeState
-#include "pycore_uop.h"           // struct _PyUOpInstruction
-
 
 #define CODE_MAX_WATCHERS 8
 #define CONTEXT_MAX_WATCHERS 8
@@ -757,36 +755,6 @@ struct _Py_unique_id_pool {
 
 typedef _Py_CODEUNIT *(*_PyJitEntryFuncPtr)(struct _PyExecutorObject *exec, _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate);
 
-typedef struct _PyJitTracerInitialState {
-    int stack_depth;
-    int chain_depth;
-    struct _PyExitData *exit;
-    PyCodeObject *code; // Strong
-    PyFunctionObject *func; // Strong
-    _Py_CODEUNIT *start_instr;
-    _Py_CODEUNIT *close_loop_instr;
-    _Py_CODEUNIT *jump_backward_instr;
-} _PyJitTracerInitialState;
-
-typedef struct _PyJitTracerPreviousState {
-    bool dependencies_still_valid;
-    bool instr_is_super;
-    int code_max_size;
-    int code_curr_size;
-    int instr_oparg;
-    int instr_stacklevel;
-    int specialize_counter;
-    _Py_CODEUNIT *instr;
-    PyCodeObject *instr_code; // Strong
-    _PyInterpreterFrame *instr_frame;
-    _PyBloomFilter dependencies;
-} _PyJitTracerPreviousState;
-
-typedef struct _PyJitTracerState {
-    _PyUOpInstruction *code_buffer;
-    _PyJitTracerInitialState initial_state;
-    _PyJitTracerPreviousState prev_state;
-} _PyJitTracerState;
 
 /* PyInterpreterState holds the global state for one of the runtime's
    interpreters.  Typically the initial (main) interpreter is the only one.
@@ -963,7 +931,6 @@ struct _is {
     struct types_state types;
     struct callable_cache callable_cache;
     PyObject *common_consts[NUM_COMMON_CONSTANTS];
-    _PyJitTracerState jit_state;
     bool jit;
     bool compiling;
     struct _PyExecutorObject *executor_list_head;
index bad968428c73a1aac3910464d8a9896d6f0f4c80..04041b273d756061c55049ff912a774539309928 100644 (file)
@@ -12,7 +12,8 @@ extern "C" {
 #include "pycore_freelist_state.h"  // struct _Py_freelists
 #include "pycore_mimalloc.h"        // struct _mimalloc_thread_state
 #include "pycore_qsbr.h"            // struct qsbr
-
+#include "pycore_uop.h"             // struct _PyUOpInstruction
+#include "pycore_structs.h"
 
 #ifdef Py_GIL_DISABLED
 struct _gc_thread_state {
@@ -21,6 +22,39 @@ struct _gc_thread_state {
 };
 #endif
 
+#if _Py_TIER2
+typedef struct _PyJitTracerInitialState {
+    int stack_depth;
+    int chain_depth;
+    struct _PyExitData *exit;
+    PyCodeObject *code; // Strong
+    PyFunctionObject *func; // Strong
+    _Py_CODEUNIT *start_instr;
+    _Py_CODEUNIT *close_loop_instr;
+    _Py_CODEUNIT *jump_backward_instr;
+} _PyJitTracerInitialState;
+
+typedef struct _PyJitTracerPreviousState {
+    bool dependencies_still_valid;
+    bool instr_is_super;
+    int code_max_size;
+    int code_curr_size;
+    int instr_oparg;
+    int instr_stacklevel;
+    int specialize_counter;
+    _Py_CODEUNIT *instr;
+    PyCodeObject *instr_code; // Strong
+    struct _PyInterpreterFrame *instr_frame;
+    _PyBloomFilter dependencies;
+} _PyJitTracerPreviousState;
+
+typedef struct _PyJitTracerState {
+    _PyUOpInstruction *code_buffer;
+    _PyJitTracerInitialState initial_state;
+    _PyJitTracerPreviousState prev_state;
+} _PyJitTracerState;
+#endif
+
 // Every PyThreadState is actually allocated as a _PyThreadStateImpl. The
 // PyThreadState fields are exposed as part of the C API, although most fields
 // are intended to be private. The _PyThreadStateImpl fields not exposed.
@@ -75,7 +109,9 @@ typedef struct _PyThreadStateImpl {
 #if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED)
     Py_ssize_t reftotal;  // this thread's total refcount operations
 #endif
-
+#if _Py_TIER2
+    _PyJitTracerState jit_state;
+#endif
 } _PyThreadStateImpl;
 
 #ifdef __cplusplus
index 42fb4170eae72576921119ed902d7e27825227a8..283a18424715b607e93fbe975b04274b24b35950 100644 (file)
@@ -2970,13 +2970,6 @@ dummy_func(
             if (!IS_JIT_TRACING() && backoff_counter_triggers(counter) &&
                 this_instr->op.code == JUMP_BACKWARD_JIT &&
                 next_instr->op.code != ENTER_EXECUTOR) {
-                if (tstate->interp->jit_state.code_buffer == NULL) {
-                    tstate->interp->jit_state.code_buffer = (_PyUOpInstruction *)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE);
-                    if (tstate->interp->jit_state.code_buffer == NULL) {
-                        // Don't error, just go to next instruction.
-                        DISPATCH();
-                    }
-                }
                 /* Back up over EXTENDED_ARGs so executor is inserted at the correct place */
                 _Py_CODEUNIT *insert_exec_at = this_instr;
                 while (oparg > 255) {
@@ -5673,24 +5666,25 @@ dummy_func(
             }
             // Super instructions. Instruction deopted. There's a mismatch in what the stack expects
             // in the optimizer. So we have to reflect in the trace correctly.
-            if ((tstate->interp->jit_state.prev_state.instr->op.code == CALL_LIST_APPEND &&
+            _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
+            if ((_tstate->jit_state.prev_state.instr->op.code == CALL_LIST_APPEND &&
                 opcode == POP_TOP) ||
-                (tstate->interp->jit_state.prev_state.instr->op.code == BINARY_OP_INPLACE_ADD_UNICODE &&
+                (_tstate->jit_state.prev_state.instr->op.code == BINARY_OP_INPLACE_ADD_UNICODE &&
                 opcode == STORE_FAST)) {
-                tstate->interp->jit_state.prev_state.instr_is_super = true;
+                _tstate->jit_state.prev_state.instr_is_super = true;
             }
             else {
-                tstate->interp->jit_state.prev_state.instr = next_instr;
+                _tstate->jit_state.prev_state.instr = next_instr;
             }
-            tstate->interp->jit_state.prev_state.specialize_counter = 0;
+            _tstate->jit_state.prev_state.specialize_counter = 0;
             PyCodeObject *prev_code = (PyCodeObject *)Py_NewRef(PyStackRef_AsPyObjectBorrow(frame->f_executable));
-            if (tstate->interp->jit_state.prev_state.instr_code != prev_code) {
-                Py_SETREF(tstate->interp->jit_state.prev_state.instr_code, prev_code);
+            if (_tstate->jit_state.prev_state.instr_code != prev_code) {
+                Py_SETREF(_tstate->jit_state.prev_state.instr_code, prev_code);
             }
 
-            tstate->interp->jit_state.prev_state.instr_frame = frame;
-            tstate->interp->jit_state.prev_state.instr_oparg = oparg;
-            tstate->interp->jit_state.prev_state.instr_stacklevel = PyStackRef_IsNone(frame->f_executable) ? 2 : STACK_LEVEL();
+            _tstate->jit_state.prev_state.instr_frame = frame;
+            _tstate->jit_state.prev_state.instr_oparg = oparg;
+            _tstate->jit_state.prev_state.instr_stacklevel = PyStackRef_IsNone(frame->f_executable) ? 2 : STACK_LEVEL();
             DISPATCH_GOTO_NON_TRACING();
 #else
             Py_FatalError("JIT label executed in non-jit build.");
index ce6493fdd85284544a692aa9b371d15633bb43fa..e6424503e88988067c07ec8201c4144db65da61d 100644 (file)
@@ -1000,16 +1000,17 @@ bail_tracing_and_jit(PyThreadState *tstate, _PyInterpreterFrame *frame)
     if (!_PyErr_Occurred(tstate) && !_is_sys_tracing) {
         err = _PyOptimizer_Optimize(frame, tstate);
     }
+    _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
     // Deal with backoffs
-    _PyExitData *exit = tstate->interp->jit_state.initial_state.exit;
+    _PyExitData *exit = _tstate->jit_state.initial_state.exit;
     if (exit == NULL) {
         // We hold a strong reference to the code object, so the instruction won't be freed.
         if (err <= 0) {
-            _Py_BackoffCounter counter = tstate->interp->jit_state.initial_state.jump_backward_instr[1].counter;
-            tstate->interp->jit_state.initial_state.jump_backward_instr[1].counter = restart_backoff_counter(counter);
+            _Py_BackoffCounter counter = _tstate->jit_state.initial_state.jump_backward_instr[1].counter;
+            _tstate->jit_state.initial_state.jump_backward_instr[1].counter = restart_backoff_counter(counter);
         }
         else {
-            tstate->interp->jit_state.initial_state.jump_backward_instr[1].counter = initial_jump_backoff_counter();
+            _tstate->jit_state.initial_state.jump_backward_instr[1].counter = initial_jump_backoff_counter();
         }
     }
     else {
@@ -1017,7 +1018,7 @@ bail_tracing_and_jit(PyThreadState *tstate, _PyInterpreterFrame *frame)
         // to be valid to access.
         if (err <= 0) {
             // Some opcodes will forever be unchanged. Don't ever bother specializing for them ever again.
-            if (tstate->interp->jit_state.prev_state.instr->op.code == INTERPRETER_EXIT) {
+            if (_tstate->jit_state.prev_state.instr->op.code == INTERPRETER_EXIT) {
                 exit->temperature = initial_unreachable_backoff_counter();
             }
             else {
index 219874ac240bd60c764940972baca3203e1ae877..c1062e9d091d3e1c3bbf444ceb35cd27f56bd61f 100644 (file)
 
 #if (_Py_TAIL_CALL_INTERP || USE_COMPUTED_GOTOS) && _Py_TIER2
 #  define IS_JIT_TRACING() (DISPATCH_TABLE_VAR == TRACING_DISPATCH_TABLE)
-#  define IS_JIT_TRACING_MAKING_PROGRESS() (IS_JIT_TRACING() && tstate->interp->jit_state.prev_state.specialize_counter < MAX_SPECIALIZATION_TRIES)
+#  define IS_JIT_TRACING_MAKING_PROGRESS() (IS_JIT_TRACING() && ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter < MAX_SPECIALIZATION_TRIES)
 #  define ENTER_TRACING() \
     DISPATCH_TABLE_VAR = TRACING_DISPATCH_TABLE;
 #  define LEAVE_TRACING() \
@@ -402,7 +402,7 @@ do {                                                   \
         JUMP_TO_LABEL(error);                          \
     }                                                  \
     if (keep_tracing_bit) { \
-        assert(tstate->interp->jit_state.prev_state.code_curr_size == 2); \
+        assert(((_PyThreadStateImpl *)tstate)->jit_state.prev_state.code_curr_size == 2); \
         ENTER_TRACING(); \
         DISPATCH_NON_TRACING(); \
     } \
index 24c4271c88fab2d4415ab93fbf43115e48775725..a45138e6e6c5b925fc4917cd3ccd471f94e708cc 100644 (file)
@@ -45,7 +45,7 @@
                     _Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, LOCALS_ARRAY);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     #if _Py_TIER2
-                    tstate->interp->jit_state.prev_state.specialize_counter++;
+                    ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++;
                     #endif
                     DISPATCH_SAME_OPARG();
                 }
                     _Py_Specialize_Call(callable, next_instr, oparg + !PyStackRef_IsNull(self_or_null));
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     #if _Py_TIER2
-                    tstate->interp->jit_state.prev_state.specialize_counter++;
+                    ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++;
                     #endif
                     DISPATCH_SAME_OPARG();
                 }
                     _Py_Specialize_CallKw(callable, next_instr, oparg + !PyStackRef_IsNull(self_or_null));
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     #if _Py_TIER2
-                    tstate->interp->jit_state.prev_state.specialize_counter++;
+                    ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++;
                     #endif
                     DISPATCH_SAME_OPARG();
                 }
                     _Py_Specialize_CompareOp(left, right, next_instr, oparg);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     #if _Py_TIER2
-                    tstate->interp->jit_state.prev_state.specialize_counter++;
+                    ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++;
                     #endif
                     DISPATCH_SAME_OPARG();
                 }
                     _Py_Specialize_ContainsOp(right, next_instr);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     #if _Py_TIER2
-                    tstate->interp->jit_state.prev_state.specialize_counter++;
+                    ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++;
                     #endif
                     DISPATCH_SAME_OPARG();
                 }
                     _Py_Specialize_ForIter(iter, null_or_index, next_instr, oparg);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     #if _Py_TIER2
-                    tstate->interp->jit_state.prev_state.specialize_counter++;
+                    ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++;
                     #endif
                     DISPATCH_SAME_OPARG();
                 }
                     FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, desired);
                     next_instr = this_instr;
                     #if _Py_TIER2
-                    tstate->interp->jit_state.prev_state.specialize_counter++;
+                    ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++;
                     #endif
                     DISPATCH_SAME_OPARG();
                 }
                 if (!IS_JIT_TRACING() && backoff_counter_triggers(counter) &&
                     this_instr->op.code == JUMP_BACKWARD_JIT &&
                     next_instr->op.code != ENTER_EXECUTOR) {
-                    if (tstate->interp->jit_state.code_buffer == NULL) {
+                    _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
+                    if (_tstate->jit_state.code_buffer == NULL) {
                         _PyFrame_SetStackPointer(frame, stack_pointer);
-                        tstate->interp->jit_state.code_buffer = (_PyUOpInstruction *)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE);
+                        _tstate->jit_state.code_buffer = (_PyUOpInstruction *)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE);
                         stack_pointer = _PyFrame_GetStackPointer(frame);
-                        if (tstate->interp->jit_state.code_buffer == NULL) {
+                        if (_tstate->jit_state.code_buffer == NULL) {
                             DISPATCH();
                         }
                     }
                     _Py_Specialize_LoadAttr(owner, next_instr, name);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     #if _Py_TIER2
-                    tstate->interp->jit_state.prev_state.specialize_counter++;
+                    ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++;
                     #endif
                     DISPATCH_SAME_OPARG();
                 }
                     _Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     #if _Py_TIER2
-                    tstate->interp->jit_state.prev_state.specialize_counter++;
+                    ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++;
                     #endif
                     DISPATCH_SAME_OPARG();
                 }
                     _Py_Specialize_LoadSuperAttr(global_super_st, class_st, next_instr, load_method);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     #if _Py_TIER2
-                    tstate->interp->jit_state.prev_state.specialize_counter++;
+                    ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++;
                     #endif
                     DISPATCH_SAME_OPARG();
                 }
                     _Py_Specialize_Send(receiver, next_instr);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     #if _Py_TIER2
-                    tstate->interp->jit_state.prev_state.specialize_counter++;
+                    ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++;
                     #endif
                     DISPATCH_SAME_OPARG();
                 }
                     _Py_Specialize_StoreAttr(owner, next_instr, name);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     #if _Py_TIER2
-                    tstate->interp->jit_state.prev_state.specialize_counter++;
+                    ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++;
                     #endif
                     DISPATCH_SAME_OPARG();
                 }
                     _Py_Specialize_StoreSubscr(container, sub, next_instr);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     #if _Py_TIER2
-                    tstate->interp->jit_state.prev_state.specialize_counter++;
+                    ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++;
                     #endif
                     DISPATCH_SAME_OPARG();
                 }
                     _Py_Specialize_ToBool(value, next_instr);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     #if _Py_TIER2
-                    tstate->interp->jit_state.prev_state.specialize_counter++;
+                    ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++;
                     #endif
                     DISPATCH_SAME_OPARG();
                 }
                     _Py_Specialize_UnpackSequence(seq, next_instr, oparg);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     #if _Py_TIER2
-                    tstate->interp->jit_state.prev_state.specialize_counter++;
+                    ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++;
                     #endif
                     DISPATCH_SAME_OPARG();
                 }
@@ -12360,25 +12361,26 @@ JUMP_TO_LABEL(error);
                 }
                 DISPATCH_GOTO_NON_TRACING();
             }
-            if ((tstate->interp->jit_state.prev_state.instr->op.code == CALL_LIST_APPEND &&
+            _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
+            if ((_tstate->jit_state.prev_state.instr->op.code == CALL_LIST_APPEND &&
                  opcode == POP_TOP) ||
-                (tstate->interp->jit_state.prev_state.instr->op.code == BINARY_OP_INPLACE_ADD_UNICODE &&
+                (_tstate->jit_state.prev_state.instr->op.code == BINARY_OP_INPLACE_ADD_UNICODE &&
                  opcode == STORE_FAST)) {
-                tstate->interp->jit_state.prev_state.instr_is_super = true;
+                _tstate->jit_state.prev_state.instr_is_super = true;
             }
             else {
-                tstate->interp->jit_state.prev_state.instr = next_instr;
+                _tstate->jit_state.prev_state.instr = next_instr;
             }
-            tstate->interp->jit_state.prev_state.specialize_counter = 0;
+            _tstate->jit_state.prev_state.specialize_counter = 0;
             PyCodeObject *prev_code = (PyCodeObject *)Py_NewRef(PyStackRef_AsPyObjectBorrow(frame->f_executable));
-            if (tstate->interp->jit_state.prev_state.instr_code != prev_code) {
+            if (_tstate->jit_state.prev_state.instr_code != prev_code) {
                 _PyFrame_SetStackPointer(frame, stack_pointer);
-                Py_SETREF(tstate->interp->jit_state.prev_state.instr_code, prev_code);
+                Py_SETREF(_tstate->jit_state.prev_state.instr_code, prev_code);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
             }
-            tstate->interp->jit_state.prev_state.instr_frame = frame;
-            tstate->interp->jit_state.prev_state.instr_oparg = oparg;
-            tstate->interp->jit_state.prev_state.instr_stacklevel = PyStackRef_IsNone(frame->f_executable) ? 2 : STACK_LEVEL();
+            _tstate->jit_state.prev_state.instr_frame = frame;
+            _tstate->jit_state.prev_state.instr_oparg = oparg;
+            _tstate->jit_state.prev_state.instr_stacklevel = PyStackRef_IsNone(frame->f_executable) ? 2 : STACK_LEVEL();
             DISPATCH_GOTO_NON_TRACING();
             #else
             Py_FatalError("JIT label executed in non-jit build.");
index 1e858ca3689ad183d4b6269fa79891ad195b73ef..c38f28e224a8b6da9ca34397cafeb481e63f1378 100644 (file)
@@ -118,14 +118,15 @@ Py_NO_INLINE int
 _PyOptimizer_Optimize(
     _PyInterpreterFrame *frame, PyThreadState *tstate)
 {
+    _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
+    int chain_depth = _tstate->jit_state.initial_state.chain_depth;
     PyInterpreterState *interp = _PyInterpreterState_GET();
-    int chain_depth = tstate->interp->jit_state.initial_state.chain_depth;
     assert(interp->jit);
     assert(!interp->compiling);
-    assert(tstate->interp->jit_state.initial_state.stack_depth >= 0);
+    assert(_tstate->jit_state.initial_state.stack_depth >= 0);
 #ifndef Py_GIL_DISABLED
     // Trace got stomped on by another thread.
-    if (tstate->interp->jit_state.initial_state.func == NULL) {
+    if (_tstate->jit_state.initial_state.func == NULL) {
         return 0;
     }
     interp->compiling = true;
@@ -135,14 +136,14 @@ _PyOptimizer_Optimize(
     // this is true, since a deopt won't infinitely re-enter the executor:
     chain_depth %= MAX_CHAIN_DEPTH;
     bool progress_needed = chain_depth == 0;
-    PyCodeObject *code = (PyCodeObject *)tstate->interp->jit_state.initial_state.code;
-    _Py_CODEUNIT *start = tstate->interp->jit_state.initial_state.start_instr;
+    PyCodeObject *code = (PyCodeObject *)_tstate->jit_state.initial_state.code;
+    _Py_CODEUNIT *start = _tstate->jit_state.initial_state.start_instr;
     if (progress_needed && !has_space_for_executor(code, start)) {
         interp->compiling = false;
         return 0;
     }
     // One of our dependencies while tracing was invalidated. Not worth compiling.
-    if (!tstate->interp->jit_state.prev_state.dependencies_still_valid) {
+    if (!_tstate->jit_state.prev_state.dependencies_still_valid) {
         interp->compiling = false;
         return 0;
     }
@@ -171,7 +172,7 @@ _PyOptimizer_Optimize(
     else {
         executor->vm_data.code = NULL;
     }
-    _PyExitData *exit = tstate->interp->jit_state.initial_state.exit;
+    _PyExitData *exit = _tstate->jit_state.initial_state.exit;
     if (exit != NULL) {
         exit->executor = executor;
     }
@@ -561,27 +562,21 @@ _PyJit_translate_single_bytecode_to_trace(
         lltrace = *python_lltrace - '0';  // TODO: Parse an int and all that
     }
 #endif
-
-    PyCodeObject *old_code = tstate->interp->jit_state.prev_state.instr_code;
-    // Something else finalized the trace. This can happen in multi-threaded scenarios as our trace
-    // addition from bytecode execution to here is not atomic.
-    // Though in GIL builds, the GIL protects the rest.
-    if (old_code == NULL) {
-        return 0;
-    }
-    bool progress_needed = (tstate->interp->jit_state.initial_state.chain_depth % MAX_CHAIN_DEPTH) == 0;
-    _PyBloomFilter *dependencies = &tstate->interp->jit_state.prev_state.dependencies;
+    _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
+    PyCodeObject *old_code = _tstate->jit_state.prev_state.instr_code;
+    bool progress_needed = (_tstate->jit_state.initial_state.chain_depth % MAX_CHAIN_DEPTH) == 0;
+    _PyBloomFilter *dependencies = &_tstate->jit_state.prev_state.dependencies;
     _Py_BloomFilter_Add(dependencies, old_code);
-    int trace_length = tstate->interp->jit_state.prev_state.code_curr_size;
-    _PyUOpInstruction *trace = tstate->interp->jit_state.code_buffer;
-    int max_length = tstate->interp->jit_state.prev_state.code_max_size;
+    int trace_length = _tstate->jit_state.prev_state.code_curr_size;
+    _PyUOpInstruction *trace = _tstate->jit_state.code_buffer;
+    int max_length = _tstate->jit_state.prev_state.code_max_size;
 
     int is_sys_tracing = (tstate->c_tracefunc != NULL) || (tstate->c_profilefunc != NULL);
     if (is_sys_tracing) {
         goto full;
     }
 
-    _Py_CODEUNIT *this_instr =  tstate->interp->jit_state.prev_state.instr;
+    _Py_CODEUNIT *this_instr =  _tstate->jit_state.prev_state.instr;
     _Py_CODEUNIT *target_instr = this_instr;
     uint32_t target = 0;
 
@@ -591,10 +586,10 @@ _PyJit_translate_single_bytecode_to_trace(
 
     // Rewind EXTENDED_ARG so that we see the whole thing.
     // We must point to the first EXTENDED_ARG when deopting.
-    int oparg = tstate->interp->jit_state.prev_state.instr_oparg;
+    int oparg = _tstate->jit_state.prev_state.instr_oparg;
     int opcode = this_instr->op.code;
     // Failed specialization many times. Deopt!
-    if (tstate->interp->jit_state.prev_state.specialize_counter >= MAX_SPECIALIZATION_TRIES) {
+    if (_tstate->jit_state.prev_state.specialize_counter >= MAX_SPECIALIZATION_TRIES) {
         opcode = _PyOpcode_Deopt[opcode];
     }
     int rewind_oparg = oparg;
@@ -603,7 +598,7 @@ _PyJit_translate_single_bytecode_to_trace(
         target--;
     }
 
-    int old_stack_level = tstate->interp->jit_state.prev_state.instr_stacklevel;
+    int old_stack_level = _tstate->jit_state.prev_state.instr_stacklevel;
 
     // Strange control-flow
     bool has_dynamic_jump_taken = OPCODE_HAS_UNPREDICTABLE_JUMP(opcode) &&
@@ -611,7 +606,7 @@ _PyJit_translate_single_bytecode_to_trace(
 
     /* Special case the first instruction,
     * so that we can guarantee forward progress */
-    if (progress_needed && tstate->interp->jit_state.prev_state.code_curr_size <= 3) {
+    if (progress_needed && _tstate->jit_state.prev_state.code_curr_size <= 3) {
         if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) {
             opcode = _PyOpcode_Deopt[opcode];
         }
@@ -633,8 +628,8 @@ _PyJit_translate_single_bytecode_to_trace(
 #endif
 
     // Skip over super instructions.
-    if (tstate->interp->jit_state.prev_state.instr_is_super) {
-        tstate->interp->jit_state.prev_state.instr_is_super = false;
+    if (_tstate->jit_state.prev_state.instr_is_super) {
+        _tstate->jit_state.prev_state.instr_is_super = false;
         return 1;
     }
 
@@ -642,13 +637,13 @@ _PyJit_translate_single_bytecode_to_trace(
         goto full;
     }
 
-    if (!tstate->interp->jit_state.prev_state.dependencies_still_valid) {
+    if (!_tstate->jit_state.prev_state.dependencies_still_valid) {
         goto done;
     }
 
     // This happens when a recursive call happens that we can't trace. Such as Python -> C -> Python calls
     // If we haven't guarded the IP, then it's untraceable.
-    if (frame != tstate->interp->jit_state.prev_state.instr_frame && !needs_guard_ip) {
+    if (frame != _tstate->jit_state.prev_state.instr_frame && !needs_guard_ip) {
         DPRINTF(2, "Unsupported: unguardable jump taken\n");
         goto unsupported;
     }
@@ -748,9 +743,9 @@ _PyJit_translate_single_bytecode_to_trace(
             _Py_FALLTHROUGH;
         case JUMP_BACKWARD_NO_INTERRUPT:
         {
-            if ((next_instr != tstate->interp->jit_state.initial_state.close_loop_instr) &&
-                (next_instr != tstate->interp->jit_state.initial_state.start_instr) &&
-                tstate->interp->jit_state.prev_state.code_curr_size > 5 &&
+            if ((next_instr != _tstate->jit_state.initial_state.close_loop_instr) &&
+                (next_instr != _tstate->jit_state.initial_state.start_instr) &&
+                _tstate->jit_state.prev_state.code_curr_size > 5 &&
                 // These are coroutines, and we want to unroll those usually.
                 opcode != JUMP_BACKWARD_NO_INTERRUPT) {
                 // We encountered a JUMP_BACKWARD but not to the top of our own loop.
@@ -761,7 +756,7 @@ _PyJit_translate_single_bytecode_to_trace(
                 ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
                 trace[trace_length-1].operand1 = true; // is_control_flow
                 DPRINTF(2, "JUMP_BACKWARD not to top ends trace %p %p %p\n", next_instr,
-                    tstate->interp->jit_state.initial_state.close_loop_instr, tstate->interp->jit_state.initial_state.start_instr);
+                    _tstate->jit_state.initial_state.close_loop_instr, _tstate->jit_state.initial_state.start_instr);
                 goto done;
             }
             break;
@@ -916,9 +911,9 @@ _PyJit_translate_single_bytecode_to_trace(
         }
     }
     // Loop back to the start
-    int is_first_instr = tstate->interp->jit_state.initial_state.close_loop_instr == next_instr ||
-        tstate->interp->jit_state.initial_state.start_instr == next_instr;
-    if (is_first_instr && tstate->interp->jit_state.prev_state.code_curr_size > 5) {
+    int is_first_instr = _tstate->jit_state.initial_state.close_loop_instr == next_instr ||
+        _tstate->jit_state.initial_state.start_instr == next_instr;
+    if (is_first_instr && _tstate->jit_state.prev_state.code_curr_size > 5) {
         if (needs_guard_ip) {
             ADD_TO_TRACE(_SET_IP, 0, (uintptr_t)next_instr, 0);
         }
@@ -926,27 +921,27 @@ _PyJit_translate_single_bytecode_to_trace(
         goto done;
     }
     DPRINTF(2, "Trace continuing\n");
-    tstate->interp->jit_state.prev_state.code_curr_size = trace_length;
-    tstate->interp->jit_state.prev_state.code_max_size = max_length;
+    _tstate->jit_state.prev_state.code_curr_size = trace_length;
+    _tstate->jit_state.prev_state.code_max_size = max_length;
     return 1;
 done:
     DPRINTF(2, "Trace done\n");
-    tstate->interp->jit_state.prev_state.code_curr_size = trace_length;
-    tstate->interp->jit_state.prev_state.code_max_size = max_length;
+    _tstate->jit_state.prev_state.code_curr_size = trace_length;
+    _tstate->jit_state.prev_state.code_max_size = max_length;
     return 0;
 full:
     DPRINTF(2, "Trace full\n");
-    if (!is_terminator(&tstate->interp->jit_state.code_buffer[trace_length-1])) {
+    if (!is_terminator(&_tstate->jit_state.code_buffer[trace_length-1])) {
         // Undo the last few instructions.
-        trace_length = tstate->interp->jit_state.prev_state.code_curr_size;
-        max_length = tstate->interp->jit_state.prev_state.code_max_size;
+        trace_length = _tstate->jit_state.prev_state.code_curr_size;
+        max_length = _tstate->jit_state.prev_state.code_max_size;
         // We previously reversed one.
         max_length += 1;
         ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
         trace[trace_length-1].operand1 = true; // is_control_flow
     }
-    tstate->interp->jit_state.prev_state.code_curr_size = trace_length;
-    tstate->interp->jit_state.prev_state.code_max_size = max_length;
+    _tstate->jit_state.prev_state.code_curr_size = trace_length;
+    _tstate->jit_state.prev_state.code_max_size = max_length;
     return 0;
 }
 
@@ -957,14 +952,22 @@ _PyJit_TryInitializeTracing(
     _Py_CODEUNIT *start_instr, _Py_CODEUNIT *close_loop_instr, int curr_stackdepth, int chain_depth,
     _PyExitData *exit, int oparg)
 {
+    _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
     // A recursive trace.
     // Don't trace into the inner call because it will stomp on the previous trace, causing endless retraces.
-    if (tstate->interp->jit_state.prev_state.code_curr_size > 2) {
+    if (_tstate->jit_state.prev_state.code_curr_size > 2) {
         return 0;
     }
     if (oparg > 0xFFFF) {
         return 0;
     }
+    if (_tstate->jit_state.code_buffer == NULL) {
+        _tstate->jit_state.code_buffer = (_PyUOpInstruction *)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE);
+        if (_tstate->jit_state.code_buffer == NULL) {
+            // Don't error, just go to next instruction.
+            return 0;
+        }
+    }
 
     PyCodeObject *code = _PyFrame_GetCode(frame);
 #ifdef Py_DEBUG
@@ -982,42 +985,43 @@ _PyJit_TryInitializeTracing(
         chain_depth);
 #endif
 
-    add_to_trace(tstate->interp->jit_state.code_buffer, 0, _START_EXECUTOR, 0, (uintptr_t)start_instr, INSTR_IP(start_instr, code));
-    add_to_trace(tstate->interp->jit_state.code_buffer, 1, _MAKE_WARM, 0, 0, 0);
-    tstate->interp->jit_state.prev_state.code_curr_size = 2;
-
-    tstate->interp->jit_state.prev_state.code_max_size = UOP_MAX_TRACE_LENGTH;
-    tstate->interp->jit_state.initial_state.start_instr = start_instr;
-    tstate->interp->jit_state.initial_state.close_loop_instr = close_loop_instr;
-    tstate->interp->jit_state.initial_state.code = (PyCodeObject *)Py_NewRef(code);
-    tstate->interp->jit_state.initial_state.func = (PyFunctionObject *)Py_XNewRef(PyStackRef_AsPyObjectBorrow(frame->f_funcobj));
-    tstate->interp->jit_state.initial_state.exit = exit;
-    tstate->interp->jit_state.initial_state.stack_depth = curr_stackdepth;
-    tstate->interp->jit_state.initial_state.chain_depth = chain_depth;
-    tstate->interp->jit_state.prev_state.instr_frame = frame;
-    tstate->interp->jit_state.prev_state.dependencies_still_valid = true;
-    tstate->interp->jit_state.prev_state.specialize_counter = 0;
-    tstate->interp->jit_state.prev_state.instr_code = (PyCodeObject *)Py_NewRef(_PyFrame_GetCode(frame));
-    tstate->interp->jit_state.prev_state.instr = curr_instr;
-    tstate->interp->jit_state.prev_state.instr_frame = frame;
-    tstate->interp->jit_state.prev_state.instr_oparg = oparg;
-    tstate->interp->jit_state.prev_state.instr_stacklevel = curr_stackdepth;
-    tstate->interp->jit_state.prev_state.instr_is_super = false;
+    add_to_trace(_tstate->jit_state.code_buffer, 0, _START_EXECUTOR, 0, (uintptr_t)start_instr, INSTR_IP(start_instr, code));
+    add_to_trace(_tstate->jit_state.code_buffer, 1, _MAKE_WARM, 0, 0, 0);
+    _tstate->jit_state.prev_state.code_curr_size = 2;
+
+    _tstate->jit_state.prev_state.code_max_size = UOP_MAX_TRACE_LENGTH;
+    _tstate->jit_state.initial_state.start_instr = start_instr;
+    _tstate->jit_state.initial_state.close_loop_instr = close_loop_instr;
+    _tstate->jit_state.initial_state.code = (PyCodeObject *)Py_NewRef(code);
+    _tstate->jit_state.initial_state.func = (PyFunctionObject *)Py_XNewRef(PyStackRef_AsPyObjectBorrow(frame->f_funcobj));
+    _tstate->jit_state.initial_state.exit = exit;
+    _tstate->jit_state.initial_state.stack_depth = curr_stackdepth;
+    _tstate->jit_state.initial_state.chain_depth = chain_depth;
+    _tstate->jit_state.prev_state.instr_frame = frame;
+    _tstate->jit_state.prev_state.dependencies_still_valid = true;
+    _tstate->jit_state.prev_state.specialize_counter = 0;
+    _tstate->jit_state.prev_state.instr_code = (PyCodeObject *)Py_NewRef(_PyFrame_GetCode(frame));
+    _tstate->jit_state.prev_state.instr = curr_instr;
+    _tstate->jit_state.prev_state.instr_frame = frame;
+    _tstate->jit_state.prev_state.instr_oparg = oparg;
+    _tstate->jit_state.prev_state.instr_stacklevel = curr_stackdepth;
+    _tstate->jit_state.prev_state.instr_is_super = false;
     assert(curr_instr->op.code == JUMP_BACKWARD_JIT || (exit != NULL));
-    tstate->interp->jit_state.initial_state.jump_backward_instr = curr_instr;
+    _tstate->jit_state.initial_state.jump_backward_instr = curr_instr;
     assert(curr_instr->op.code == JUMP_BACKWARD_JIT || (exit != NULL));
-    _Py_BloomFilter_Init(&tstate->interp->jit_state.prev_state.dependencies);
+    _Py_BloomFilter_Init(&_tstate->jit_state.prev_state.dependencies);
     return 1;
 }
 
 void
 _PyJit_FinalizeTracing(PyThreadState *tstate)
 {
-    Py_CLEAR(tstate->interp->jit_state.initial_state.code);
-    Py_CLEAR(tstate->interp->jit_state.initial_state.func);
-    Py_CLEAR(tstate->interp->jit_state.prev_state.instr_code);
-    tstate->interp->jit_state.prev_state.code_curr_size = 2;
-    tstate->interp->jit_state.prev_state.code_max_size = UOP_MAX_TRACE_LENGTH - 1;
+    _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
+    Py_CLEAR(_tstate->jit_state.initial_state.code);
+    Py_CLEAR(_tstate->jit_state.initial_state.func);
+    Py_CLEAR(_tstate->jit_state.prev_state.instr_code);
+    _tstate->jit_state.prev_state.code_curr_size = 2;
+    _tstate->jit_state.prev_state.code_max_size = UOP_MAX_TRACE_LENGTH - 1;
 }
 
 
@@ -1327,6 +1331,7 @@ uop_optimize(
     _PyExecutorObject **exec_ptr,
     bool progress_needed)
 {
+    _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
     // Note: the executor has a slightly different set of dependencies than the tracer.
     // For example: the tracer depends on function and code objects.
     // The executor may only depend on the code object.
@@ -1337,17 +1342,16 @@ uop_optimize(
     // It is the optimizer's responsibility to add the dependencies it requires on its own.
     _PyBloomFilter new_dependencies;
     _Py_BloomFilter_Init(&new_dependencies);
-    _Py_BloomFilter_Add(&new_dependencies, tstate->interp->jit_state.initial_state.code);
-    PyInterpreterState *interp = _PyInterpreterState_GET();
-    _PyUOpInstruction *buffer = interp->jit_state.code_buffer;
+    _Py_BloomFilter_Add(&new_dependencies, _tstate->jit_state.initial_state.code);
+    _PyUOpInstruction *buffer = _tstate->jit_state.code_buffer;
     OPT_STAT_INC(attempts);
     char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE");
     bool is_noopt = true;
     if (env_var == NULL || *env_var == '\0' || *env_var > '0') {
         is_noopt = false;
     }
-    int curr_stackentries = tstate->interp->jit_state.initial_state.stack_depth;
-    int length = interp->jit_state.prev_state.code_curr_size;
+    int curr_stackentries = _tstate->jit_state.initial_state.stack_depth;
+    int length = _tstate->jit_state.prev_state.code_curr_size;
     // Trace too short, don't bother.
     if (length <= 5) {
         return 0;
@@ -1356,7 +1360,7 @@ uop_optimize(
     assert(length < UOP_MAX_TRACE_LENGTH);
     OPT_STAT_INC(traces_created);
     if (!is_noopt) {
-        length = _Py_uop_analyze_and_optimize(tstate->interp->jit_state.initial_state.func, buffer,
+        length = _Py_uop_analyze_and_optimize(_tstate->jit_state.initial_state.func, buffer,
                                            length,
                                            curr_stackentries, &new_dependencies);
         if (length <= 0) {
@@ -1382,7 +1386,7 @@ uop_optimize(
     length = prepare_for_execution(buffer, length);
     assert(length <= UOP_MAX_TRACE_LENGTH);
     _PyExecutorObject *executor = make_executor_from_uops(
-        buffer, length, &new_dependencies, tstate->interp->jit_state.initial_state.chain_depth);
+        buffer, length, &new_dependencies, _tstate->jit_state.initial_state.chain_depth);
     if (executor == NULL) {
         return -1;
     }
@@ -1724,10 +1728,10 @@ _PyJit_Tracer_InvalidateDependency(PyThreadState *tstate, void *obj)
     _PyBloomFilter obj_filter;
     _Py_BloomFilter_Init(&obj_filter);
     _Py_BloomFilter_Add(&obj_filter, obj);
-
-    if (bloom_filter_may_contain(&tstate->interp->jit_state.prev_state.dependencies, &obj_filter))
+    _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
+    if (bloom_filter_may_contain(&_tstate->jit_state.prev_state.dependencies, &obj_filter))
     {
-        tstate->interp->jit_state.prev_state.dependencies_still_valid = false;
+        _tstate->jit_state.prev_state.dependencies_still_valid = false;
     }
 }
 /* Invalidate all executors */
index 36d62ecae2ee0bb5812863588cf53de0245d7175..a6a54c2f94a7dccd2ed4e6f0032e04d850360193 100644 (file)
@@ -545,9 +545,6 @@ init_interpreter(PyInterpreterState *interp,
     _Py_brc_init_state(interp);
 #endif
 
-#ifdef _Py_TIER2
-    interp->jit_state.code_buffer = NULL;
-#endif
     llist_init(&interp->mem_free_queue.head);
     llist_init(&interp->asyncio_tasks_head);
     interp->asyncio_tasks_lock = (PyMutex){0};
@@ -797,10 +794,6 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
 
 #ifdef _Py_TIER2
     _Py_ClearExecutorDeletionList(interp);
-    if (interp->jit_state.code_buffer != NULL) {
-        _PyObject_VirtualFree(interp->jit_state.code_buffer, UOP_BUFFER_SIZE);
-        interp->jit_state.code_buffer = NULL;
-    }
 #endif
     _PyAST_Fini(interp);
     _PyAtExit_Fini(interp);
@@ -1495,6 +1488,9 @@ init_threadstate(_PyThreadStateImpl *_tstate,
     _tstate->asyncio_running_loop = NULL;
     _tstate->asyncio_running_task = NULL;
 
+#ifdef _Py_TIER2
+    _tstate->jit_state.code_buffer = NULL;
+#endif
     tstate->delete_later = NULL;
 
     llist_init(&_tstate->mem_free_queue);
@@ -1794,6 +1790,14 @@ tstate_delete_common(PyThreadState *tstate, int release_gil)
     assert(tstate_impl->refcounts.values == NULL);
 #endif
 
+#if _Py_TIER2
+    _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
+    if (_tstate->jit_state.code_buffer != NULL) {
+        _PyObject_VirtualFree(_tstate->jit_state.code_buffer, UOP_BUFFER_SIZE);
+        _tstate->jit_state.code_buffer = NULL;
+    }
+#endif
+
     HEAD_UNLOCK(runtime);
 
     // XXX Unbind in PyThreadState_Clear(), or earlier
index 4ede31992a7dfb32c968b64742e2ae0a034bcf8d..7e4f1bd5c88c865b00e19ac9e6493f0b9ef31500 100644 (file)
@@ -162,7 +162,7 @@ class Emitter:
         assert "specializing" in uop.annotations, uop.name
         self.out.start_line()
         self.emit("#if _Py_TIER2\n")
-        self.emit("tstate->interp->jit_state.prev_state.specialize_counter++;\n")
+        self.emit("((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++;\n")
         self.emit("#endif\n")
         self.emit(tkn)
         emit_to(self.out, tkn_iter, "SEMI")