From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 7 Nov 2025 22:02:00 +0000 (+0000) Subject: Move to thread state X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=aaf68732f8a3f67ff94dad51a52b393870d6dfac;p=thirdparty%2FPython%2Fcpython.git Move to thread state --- diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index a2b5afa8736c..39023923c7e2 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -14,8 +14,6 @@ extern "C" { #include "pycore_structs.h" // PyHamtObject #include "pycore_tstate.h" // _PyThreadStateImpl #include "pycore_typedefs.h" // _PyRuntimeState -#include "pycore_uop.h" // struct _PyUOpInstruction - #define CODE_MAX_WATCHERS 8 #define CONTEXT_MAX_WATCHERS 8 @@ -757,36 +755,6 @@ struct _Py_unique_id_pool { typedef _Py_CODEUNIT *(*_PyJitEntryFuncPtr)(struct _PyExecutorObject *exec, _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate); -typedef struct _PyJitTracerInitialState { - int stack_depth; - int chain_depth; - struct _PyExitData *exit; - PyCodeObject *code; // Strong - PyFunctionObject *func; // Strong - _Py_CODEUNIT *start_instr; - _Py_CODEUNIT *close_loop_instr; - _Py_CODEUNIT *jump_backward_instr; -} _PyJitTracerInitialState; - -typedef struct _PyJitTracerPreviousState { - bool dependencies_still_valid; - bool instr_is_super; - int code_max_size; - int code_curr_size; - int instr_oparg; - int instr_stacklevel; - int specialize_counter; - _Py_CODEUNIT *instr; - PyCodeObject *instr_code; // Strong - _PyInterpreterFrame *instr_frame; - _PyBloomFilter dependencies; -} _PyJitTracerPreviousState; - -typedef struct _PyJitTracerState { - _PyUOpInstruction *code_buffer; - _PyJitTracerInitialState initial_state; - _PyJitTracerPreviousState prev_state; -} _PyJitTracerState; /* PyInterpreterState holds the global state for one of the runtime's interpreters. Typically the initial (main) interpreter is the only one. @@ -963,7 +931,6 @@ struct _is { struct types_state types; struct callable_cache callable_cache; PyObject *common_consts[NUM_COMMON_CONSTANTS]; - _PyJitTracerState jit_state; bool jit; bool compiling; struct _PyExecutorObject *executor_list_head; diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index bad968428c73..04041b273d75 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -12,7 +12,8 @@ extern "C" { #include "pycore_freelist_state.h" // struct _Py_freelists #include "pycore_mimalloc.h" // struct _mimalloc_thread_state #include "pycore_qsbr.h" // struct qsbr - +#include "pycore_uop.h" // struct _PyUOpInstruction +#include "pycore_structs.h" #ifdef Py_GIL_DISABLED struct _gc_thread_state { @@ -21,6 +22,39 @@ struct _gc_thread_state { }; #endif +#if _Py_TIER2 +typedef struct _PyJitTracerInitialState { + int stack_depth; + int chain_depth; + struct _PyExitData *exit; + PyCodeObject *code; // Strong + PyFunctionObject *func; // Strong + _Py_CODEUNIT *start_instr; + _Py_CODEUNIT *close_loop_instr; + _Py_CODEUNIT *jump_backward_instr; +} _PyJitTracerInitialState; + +typedef struct _PyJitTracerPreviousState { + bool dependencies_still_valid; + bool instr_is_super; + int code_max_size; + int code_curr_size; + int instr_oparg; + int instr_stacklevel; + int specialize_counter; + _Py_CODEUNIT *instr; + PyCodeObject *instr_code; // Strong + struct _PyInterpreterFrame *instr_frame; + _PyBloomFilter dependencies; +} _PyJitTracerPreviousState; + +typedef struct _PyJitTracerState { + _PyUOpInstruction *code_buffer; + _PyJitTracerInitialState initial_state; + _PyJitTracerPreviousState prev_state; +} _PyJitTracerState; +#endif + // Every PyThreadState is actually allocated as a _PyThreadStateImpl. The // PyThreadState fields are exposed as part of the C API, although most fields // are intended to be private. The _PyThreadStateImpl fields not exposed. @@ -75,7 +109,9 @@ typedef struct _PyThreadStateImpl { #if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED) Py_ssize_t reftotal; // this thread's total refcount operations #endif - +#if _Py_TIER2 + _PyJitTracerState jit_state; +#endif } _PyThreadStateImpl; #ifdef __cplusplus diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 42fb4170eae7..283a18424715 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2970,13 +2970,6 @@ dummy_func( if (!IS_JIT_TRACING() && backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD_JIT && next_instr->op.code != ENTER_EXECUTOR) { - if (tstate->interp->jit_state.code_buffer == NULL) { - tstate->interp->jit_state.code_buffer = (_PyUOpInstruction *)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE); - if (tstate->interp->jit_state.code_buffer == NULL) { - // Don't error, just go to next instruction. - DISPATCH(); - } - } /* Back up over EXTENDED_ARGs so executor is inserted at the correct place */ _Py_CODEUNIT *insert_exec_at = this_instr; while (oparg > 255) { @@ -5673,24 +5666,25 @@ dummy_func( } // Super instructions. Instruction deopted. There's a mismatch in what the stack expects // in the optimizer. So we have to reflect in the trace correctly. - if ((tstate->interp->jit_state.prev_state.instr->op.code == CALL_LIST_APPEND && + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + if ((_tstate->jit_state.prev_state.instr->op.code == CALL_LIST_APPEND && opcode == POP_TOP) || - (tstate->interp->jit_state.prev_state.instr->op.code == BINARY_OP_INPLACE_ADD_UNICODE && + (_tstate->jit_state.prev_state.instr->op.code == BINARY_OP_INPLACE_ADD_UNICODE && opcode == STORE_FAST)) { - tstate->interp->jit_state.prev_state.instr_is_super = true; + _tstate->jit_state.prev_state.instr_is_super = true; } else { - tstate->interp->jit_state.prev_state.instr = next_instr; + _tstate->jit_state.prev_state.instr = next_instr; } - tstate->interp->jit_state.prev_state.specialize_counter = 0; + _tstate->jit_state.prev_state.specialize_counter = 0; PyCodeObject *prev_code = (PyCodeObject *)Py_NewRef(PyStackRef_AsPyObjectBorrow(frame->f_executable)); - if (tstate->interp->jit_state.prev_state.instr_code != prev_code) { - Py_SETREF(tstate->interp->jit_state.prev_state.instr_code, prev_code); + if (_tstate->jit_state.prev_state.instr_code != prev_code) { + Py_SETREF(_tstate->jit_state.prev_state.instr_code, prev_code); } - tstate->interp->jit_state.prev_state.instr_frame = frame; - tstate->interp->jit_state.prev_state.instr_oparg = oparg; - tstate->interp->jit_state.prev_state.instr_stacklevel = PyStackRef_IsNone(frame->f_executable) ? 2 : STACK_LEVEL(); + _tstate->jit_state.prev_state.instr_frame = frame; + _tstate->jit_state.prev_state.instr_oparg = oparg; + _tstate->jit_state.prev_state.instr_stacklevel = PyStackRef_IsNone(frame->f_executable) ? 2 : STACK_LEVEL(); DISPATCH_GOTO_NON_TRACING(); #else Py_FatalError("JIT label executed in non-jit build."); diff --git a/Python/ceval.c b/Python/ceval.c index ce6493fdd852..e6424503e889 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1000,16 +1000,17 @@ bail_tracing_and_jit(PyThreadState *tstate, _PyInterpreterFrame *frame) if (!_PyErr_Occurred(tstate) && !_is_sys_tracing) { err = _PyOptimizer_Optimize(frame, tstate); } + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; // Deal with backoffs - _PyExitData *exit = tstate->interp->jit_state.initial_state.exit; + _PyExitData *exit = _tstate->jit_state.initial_state.exit; if (exit == NULL) { // We hold a strong reference to the code object, so the instruction won't be freed. if (err <= 0) { - _Py_BackoffCounter counter = tstate->interp->jit_state.initial_state.jump_backward_instr[1].counter; - tstate->interp->jit_state.initial_state.jump_backward_instr[1].counter = restart_backoff_counter(counter); + _Py_BackoffCounter counter = _tstate->jit_state.initial_state.jump_backward_instr[1].counter; + _tstate->jit_state.initial_state.jump_backward_instr[1].counter = restart_backoff_counter(counter); } else { - tstate->interp->jit_state.initial_state.jump_backward_instr[1].counter = initial_jump_backoff_counter(); + _tstate->jit_state.initial_state.jump_backward_instr[1].counter = initial_jump_backoff_counter(); } } else { @@ -1017,7 +1018,7 @@ bail_tracing_and_jit(PyThreadState *tstate, _PyInterpreterFrame *frame) // to be valid to access. if (err <= 0) { // Some opcodes will forever be unchanged. Don't ever bother specializing for them ever again. - if (tstate->interp->jit_state.prev_state.instr->op.code == INTERPRETER_EXIT) { + if (_tstate->jit_state.prev_state.instr->op.code == INTERPRETER_EXIT) { exit->temperature = initial_unreachable_backoff_counter(); } else { diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 219874ac240b..c1062e9d091d 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -134,7 +134,7 @@ #if (_Py_TAIL_CALL_INTERP || USE_COMPUTED_GOTOS) && _Py_TIER2 # define IS_JIT_TRACING() (DISPATCH_TABLE_VAR == TRACING_DISPATCH_TABLE) -# define IS_JIT_TRACING_MAKING_PROGRESS() (IS_JIT_TRACING() && tstate->interp->jit_state.prev_state.specialize_counter < MAX_SPECIALIZATION_TRIES) +# define IS_JIT_TRACING_MAKING_PROGRESS() (IS_JIT_TRACING() && ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter < MAX_SPECIALIZATION_TRIES) # define ENTER_TRACING() \ DISPATCH_TABLE_VAR = TRACING_DISPATCH_TABLE; # define LEAVE_TRACING() \ @@ -402,7 +402,7 @@ do { \ JUMP_TO_LABEL(error); \ } \ if (keep_tracing_bit) { \ - assert(tstate->interp->jit_state.prev_state.code_curr_size == 2); \ + assert(((_PyThreadStateImpl *)tstate)->jit_state.prev_state.code_curr_size == 2); \ ENTER_TRACING(); \ DISPATCH_NON_TRACING(); \ } \ diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 24c4271c88fa..a45138e6e6c5 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -45,7 +45,7 @@ _Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, LOCALS_ARRAY); stack_pointer = _PyFrame_GetStackPointer(frame); #if _Py_TIER2 - tstate->interp->jit_state.prev_state.specialize_counter++; + ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++; #endif DISPATCH_SAME_OPARG(); } @@ -1539,7 +1539,7 @@ _Py_Specialize_Call(callable, next_instr, oparg + !PyStackRef_IsNull(self_or_null)); stack_pointer = _PyFrame_GetStackPointer(frame); #if _Py_TIER2 - tstate->interp->jit_state.prev_state.specialize_counter++; + ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++; #endif DISPATCH_SAME_OPARG(); } @@ -2832,7 +2832,7 @@ _Py_Specialize_CallKw(callable, next_instr, oparg + !PyStackRef_IsNull(self_or_null)); stack_pointer = _PyFrame_GetStackPointer(frame); #if _Py_TIER2 - tstate->interp->jit_state.prev_state.specialize_counter++; + ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++; #endif DISPATCH_SAME_OPARG(); } @@ -4678,7 +4678,7 @@ _Py_Specialize_CompareOp(left, right, next_instr, oparg); stack_pointer = _PyFrame_GetStackPointer(frame); #if _Py_TIER2 - tstate->interp->jit_state.prev_state.specialize_counter++; + ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++; #endif DISPATCH_SAME_OPARG(); } @@ -4926,7 +4926,7 @@ _Py_Specialize_ContainsOp(right, next_instr); stack_pointer = _PyFrame_GetStackPointer(frame); #if _Py_TIER2 - tstate->interp->jit_state.prev_state.specialize_counter++; + ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++; #endif DISPATCH_SAME_OPARG(); } @@ -5665,7 +5665,7 @@ _Py_Specialize_ForIter(iter, null_or_index, next_instr, oparg); stack_pointer = _PyFrame_GetStackPointer(frame); #if _Py_TIER2 - tstate->interp->jit_state.prev_state.specialize_counter++; + ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++; #endif DISPATCH_SAME_OPARG(); } @@ -7647,7 +7647,7 @@ FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, desired); next_instr = this_instr; #if _Py_TIER2 - tstate->interp->jit_state.prev_state.specialize_counter++; + ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++; #endif DISPATCH_SAME_OPARG(); } @@ -7703,11 +7703,12 @@ if (!IS_JIT_TRACING() && backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD_JIT && next_instr->op.code != ENTER_EXECUTOR) { - if (tstate->interp->jit_state.code_buffer == NULL) { + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + if (_tstate->jit_state.code_buffer == NULL) { _PyFrame_SetStackPointer(frame, stack_pointer); - tstate->interp->jit_state.code_buffer = (_PyUOpInstruction *)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE); + _tstate->jit_state.code_buffer = (_PyUOpInstruction *)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE); stack_pointer = _PyFrame_GetStackPointer(frame); - if (tstate->interp->jit_state.code_buffer == NULL) { + if (_tstate->jit_state.code_buffer == NULL) { DISPATCH(); } } @@ -7880,7 +7881,7 @@ _Py_Specialize_LoadAttr(owner, next_instr, name); stack_pointer = _PyFrame_GetStackPointer(frame); #if _Py_TIER2 - tstate->interp->jit_state.prev_state.specialize_counter++; + ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++; #endif DISPATCH_SAME_OPARG(); } @@ -9188,7 +9189,7 @@ _Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name); stack_pointer = _PyFrame_GetStackPointer(frame); #if _Py_TIER2 - tstate->interp->jit_state.prev_state.specialize_counter++; + ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++; #endif DISPATCH_SAME_OPARG(); } @@ -9511,7 +9512,7 @@ _Py_Specialize_LoadSuperAttr(global_super_st, class_st, next_instr, load_method); stack_pointer = _PyFrame_GetStackPointer(frame); #if _Py_TIER2 - tstate->interp->jit_state.prev_state.specialize_counter++; + ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++; #endif DISPATCH_SAME_OPARG(); } @@ -10499,7 +10500,7 @@ _Py_Specialize_Send(receiver, next_instr); stack_pointer = _PyFrame_GetStackPointer(frame); #if _Py_TIER2 - tstate->interp->jit_state.prev_state.specialize_counter++; + ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++; #endif DISPATCH_SAME_OPARG(); } @@ -10801,7 +10802,7 @@ _Py_Specialize_StoreAttr(owner, next_instr, name); stack_pointer = _PyFrame_GetStackPointer(frame); #if _Py_TIER2 - tstate->interp->jit_state.prev_state.specialize_counter++; + ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++; #endif DISPATCH_SAME_OPARG(); } @@ -11301,7 +11302,7 @@ _Py_Specialize_StoreSubscr(container, sub, next_instr); stack_pointer = _PyFrame_GetStackPointer(frame); #if _Py_TIER2 - tstate->interp->jit_state.prev_state.specialize_counter++; + ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++; #endif DISPATCH_SAME_OPARG(); } @@ -11513,7 +11514,7 @@ _Py_Specialize_ToBool(value, next_instr); stack_pointer = _PyFrame_GetStackPointer(frame); #if _Py_TIER2 - tstate->interp->jit_state.prev_state.specialize_counter++; + ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++; #endif DISPATCH_SAME_OPARG(); } @@ -11896,7 +11897,7 @@ _Py_Specialize_UnpackSequence(seq, next_instr, oparg); stack_pointer = _PyFrame_GetStackPointer(frame); #if _Py_TIER2 - tstate->interp->jit_state.prev_state.specialize_counter++; + ((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++; #endif DISPATCH_SAME_OPARG(); } @@ -12360,25 +12361,26 @@ JUMP_TO_LABEL(error); } DISPATCH_GOTO_NON_TRACING(); } - if ((tstate->interp->jit_state.prev_state.instr->op.code == CALL_LIST_APPEND && + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + if ((_tstate->jit_state.prev_state.instr->op.code == CALL_LIST_APPEND && opcode == POP_TOP) || - (tstate->interp->jit_state.prev_state.instr->op.code == BINARY_OP_INPLACE_ADD_UNICODE && + (_tstate->jit_state.prev_state.instr->op.code == BINARY_OP_INPLACE_ADD_UNICODE && opcode == STORE_FAST)) { - tstate->interp->jit_state.prev_state.instr_is_super = true; + _tstate->jit_state.prev_state.instr_is_super = true; } else { - tstate->interp->jit_state.prev_state.instr = next_instr; + _tstate->jit_state.prev_state.instr = next_instr; } - tstate->interp->jit_state.prev_state.specialize_counter = 0; + _tstate->jit_state.prev_state.specialize_counter = 0; PyCodeObject *prev_code = (PyCodeObject *)Py_NewRef(PyStackRef_AsPyObjectBorrow(frame->f_executable)); - if (tstate->interp->jit_state.prev_state.instr_code != prev_code) { + if (_tstate->jit_state.prev_state.instr_code != prev_code) { _PyFrame_SetStackPointer(frame, stack_pointer); - Py_SETREF(tstate->interp->jit_state.prev_state.instr_code, prev_code); + Py_SETREF(_tstate->jit_state.prev_state.instr_code, prev_code); stack_pointer = _PyFrame_GetStackPointer(frame); } - tstate->interp->jit_state.prev_state.instr_frame = frame; - tstate->interp->jit_state.prev_state.instr_oparg = oparg; - tstate->interp->jit_state.prev_state.instr_stacklevel = PyStackRef_IsNone(frame->f_executable) ? 2 : STACK_LEVEL(); + _tstate->jit_state.prev_state.instr_frame = frame; + _tstate->jit_state.prev_state.instr_oparg = oparg; + _tstate->jit_state.prev_state.instr_stacklevel = PyStackRef_IsNone(frame->f_executable) ? 2 : STACK_LEVEL(); DISPATCH_GOTO_NON_TRACING(); #else Py_FatalError("JIT label executed in non-jit build."); diff --git a/Python/optimizer.c b/Python/optimizer.c index 1e858ca3689a..c38f28e224a8 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -118,14 +118,15 @@ Py_NO_INLINE int _PyOptimizer_Optimize( _PyInterpreterFrame *frame, PyThreadState *tstate) { + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + int chain_depth = _tstate->jit_state.initial_state.chain_depth; PyInterpreterState *interp = _PyInterpreterState_GET(); - int chain_depth = tstate->interp->jit_state.initial_state.chain_depth; assert(interp->jit); assert(!interp->compiling); - assert(tstate->interp->jit_state.initial_state.stack_depth >= 0); + assert(_tstate->jit_state.initial_state.stack_depth >= 0); #ifndef Py_GIL_DISABLED // Trace got stomped on by another thread. - if (tstate->interp->jit_state.initial_state.func == NULL) { + if (_tstate->jit_state.initial_state.func == NULL) { return 0; } interp->compiling = true; @@ -135,14 +136,14 @@ _PyOptimizer_Optimize( // this is true, since a deopt won't infinitely re-enter the executor: chain_depth %= MAX_CHAIN_DEPTH; bool progress_needed = chain_depth == 0; - PyCodeObject *code = (PyCodeObject *)tstate->interp->jit_state.initial_state.code; - _Py_CODEUNIT *start = tstate->interp->jit_state.initial_state.start_instr; + PyCodeObject *code = (PyCodeObject *)_tstate->jit_state.initial_state.code; + _Py_CODEUNIT *start = _tstate->jit_state.initial_state.start_instr; if (progress_needed && !has_space_for_executor(code, start)) { interp->compiling = false; return 0; } // One of our dependencies while tracing was invalidated. Not worth compiling. - if (!tstate->interp->jit_state.prev_state.dependencies_still_valid) { + if (!_tstate->jit_state.prev_state.dependencies_still_valid) { interp->compiling = false; return 0; } @@ -171,7 +172,7 @@ _PyOptimizer_Optimize( else { executor->vm_data.code = NULL; } - _PyExitData *exit = tstate->interp->jit_state.initial_state.exit; + _PyExitData *exit = _tstate->jit_state.initial_state.exit; if (exit != NULL) { exit->executor = executor; } @@ -561,27 +562,21 @@ _PyJit_translate_single_bytecode_to_trace( lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that } #endif - - PyCodeObject *old_code = tstate->interp->jit_state.prev_state.instr_code; - // Something else finalized the trace. This can happen in multi-threaded scenarios as our trace - // addition from bytecode execution to here is not atomic. - // Though in GIL builds, the GIL protects the rest. - if (old_code == NULL) { - return 0; - } - bool progress_needed = (tstate->interp->jit_state.initial_state.chain_depth % MAX_CHAIN_DEPTH) == 0; - _PyBloomFilter *dependencies = &tstate->interp->jit_state.prev_state.dependencies; + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + PyCodeObject *old_code = _tstate->jit_state.prev_state.instr_code; + bool progress_needed = (_tstate->jit_state.initial_state.chain_depth % MAX_CHAIN_DEPTH) == 0; + _PyBloomFilter *dependencies = &_tstate->jit_state.prev_state.dependencies; _Py_BloomFilter_Add(dependencies, old_code); - int trace_length = tstate->interp->jit_state.prev_state.code_curr_size; - _PyUOpInstruction *trace = tstate->interp->jit_state.code_buffer; - int max_length = tstate->interp->jit_state.prev_state.code_max_size; + int trace_length = _tstate->jit_state.prev_state.code_curr_size; + _PyUOpInstruction *trace = _tstate->jit_state.code_buffer; + int max_length = _tstate->jit_state.prev_state.code_max_size; int is_sys_tracing = (tstate->c_tracefunc != NULL) || (tstate->c_profilefunc != NULL); if (is_sys_tracing) { goto full; } - _Py_CODEUNIT *this_instr = tstate->interp->jit_state.prev_state.instr; + _Py_CODEUNIT *this_instr = _tstate->jit_state.prev_state.instr; _Py_CODEUNIT *target_instr = this_instr; uint32_t target = 0; @@ -591,10 +586,10 @@ _PyJit_translate_single_bytecode_to_trace( // Rewind EXTENDED_ARG so that we see the whole thing. // We must point to the first EXTENDED_ARG when deopting. - int oparg = tstate->interp->jit_state.prev_state.instr_oparg; + int oparg = _tstate->jit_state.prev_state.instr_oparg; int opcode = this_instr->op.code; // Failed specialization many times. Deopt! - if (tstate->interp->jit_state.prev_state.specialize_counter >= MAX_SPECIALIZATION_TRIES) { + if (_tstate->jit_state.prev_state.specialize_counter >= MAX_SPECIALIZATION_TRIES) { opcode = _PyOpcode_Deopt[opcode]; } int rewind_oparg = oparg; @@ -603,7 +598,7 @@ _PyJit_translate_single_bytecode_to_trace( target--; } - int old_stack_level = tstate->interp->jit_state.prev_state.instr_stacklevel; + int old_stack_level = _tstate->jit_state.prev_state.instr_stacklevel; // Strange control-flow bool has_dynamic_jump_taken = OPCODE_HAS_UNPREDICTABLE_JUMP(opcode) && @@ -611,7 +606,7 @@ _PyJit_translate_single_bytecode_to_trace( /* Special case the first instruction, * so that we can guarantee forward progress */ - if (progress_needed && tstate->interp->jit_state.prev_state.code_curr_size <= 3) { + if (progress_needed && _tstate->jit_state.prev_state.code_curr_size <= 3) { if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) { opcode = _PyOpcode_Deopt[opcode]; } @@ -633,8 +628,8 @@ _PyJit_translate_single_bytecode_to_trace( #endif // Skip over super instructions. - if (tstate->interp->jit_state.prev_state.instr_is_super) { - tstate->interp->jit_state.prev_state.instr_is_super = false; + if (_tstate->jit_state.prev_state.instr_is_super) { + _tstate->jit_state.prev_state.instr_is_super = false; return 1; } @@ -642,13 +637,13 @@ _PyJit_translate_single_bytecode_to_trace( goto full; } - if (!tstate->interp->jit_state.prev_state.dependencies_still_valid) { + if (!_tstate->jit_state.prev_state.dependencies_still_valid) { goto done; } // This happens when a recursive call happens that we can't trace. Such as Python -> C -> Python calls // If we haven't guarded the IP, then it's untraceable. - if (frame != tstate->interp->jit_state.prev_state.instr_frame && !needs_guard_ip) { + if (frame != _tstate->jit_state.prev_state.instr_frame && !needs_guard_ip) { DPRINTF(2, "Unsupported: unguardable jump taken\n"); goto unsupported; } @@ -748,9 +743,9 @@ _PyJit_translate_single_bytecode_to_trace( _Py_FALLTHROUGH; case JUMP_BACKWARD_NO_INTERRUPT: { - if ((next_instr != tstate->interp->jit_state.initial_state.close_loop_instr) && - (next_instr != tstate->interp->jit_state.initial_state.start_instr) && - tstate->interp->jit_state.prev_state.code_curr_size > 5 && + if ((next_instr != _tstate->jit_state.initial_state.close_loop_instr) && + (next_instr != _tstate->jit_state.initial_state.start_instr) && + _tstate->jit_state.prev_state.code_curr_size > 5 && // These are coroutines, and we want to unroll those usually. opcode != JUMP_BACKWARD_NO_INTERRUPT) { // We encountered a JUMP_BACKWARD but not to the top of our own loop. @@ -761,7 +756,7 @@ _PyJit_translate_single_bytecode_to_trace( ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target); trace[trace_length-1].operand1 = true; // is_control_flow DPRINTF(2, "JUMP_BACKWARD not to top ends trace %p %p %p\n", next_instr, - tstate->interp->jit_state.initial_state.close_loop_instr, tstate->interp->jit_state.initial_state.start_instr); + _tstate->jit_state.initial_state.close_loop_instr, _tstate->jit_state.initial_state.start_instr); goto done; } break; @@ -916,9 +911,9 @@ _PyJit_translate_single_bytecode_to_trace( } } // Loop back to the start - int is_first_instr = tstate->interp->jit_state.initial_state.close_loop_instr == next_instr || - tstate->interp->jit_state.initial_state.start_instr == next_instr; - if (is_first_instr && tstate->interp->jit_state.prev_state.code_curr_size > 5) { + int is_first_instr = _tstate->jit_state.initial_state.close_loop_instr == next_instr || + _tstate->jit_state.initial_state.start_instr == next_instr; + if (is_first_instr && _tstate->jit_state.prev_state.code_curr_size > 5) { if (needs_guard_ip) { ADD_TO_TRACE(_SET_IP, 0, (uintptr_t)next_instr, 0); } @@ -926,27 +921,27 @@ _PyJit_translate_single_bytecode_to_trace( goto done; } DPRINTF(2, "Trace continuing\n"); - tstate->interp->jit_state.prev_state.code_curr_size = trace_length; - tstate->interp->jit_state.prev_state.code_max_size = max_length; + _tstate->jit_state.prev_state.code_curr_size = trace_length; + _tstate->jit_state.prev_state.code_max_size = max_length; return 1; done: DPRINTF(2, "Trace done\n"); - tstate->interp->jit_state.prev_state.code_curr_size = trace_length; - tstate->interp->jit_state.prev_state.code_max_size = max_length; + _tstate->jit_state.prev_state.code_curr_size = trace_length; + _tstate->jit_state.prev_state.code_max_size = max_length; return 0; full: DPRINTF(2, "Trace full\n"); - if (!is_terminator(&tstate->interp->jit_state.code_buffer[trace_length-1])) { + if (!is_terminator(&_tstate->jit_state.code_buffer[trace_length-1])) { // Undo the last few instructions. - trace_length = tstate->interp->jit_state.prev_state.code_curr_size; - max_length = tstate->interp->jit_state.prev_state.code_max_size; + trace_length = _tstate->jit_state.prev_state.code_curr_size; + max_length = _tstate->jit_state.prev_state.code_max_size; // We previously reversed one. max_length += 1; ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target); trace[trace_length-1].operand1 = true; // is_control_flow } - tstate->interp->jit_state.prev_state.code_curr_size = trace_length; - tstate->interp->jit_state.prev_state.code_max_size = max_length; + _tstate->jit_state.prev_state.code_curr_size = trace_length; + _tstate->jit_state.prev_state.code_max_size = max_length; return 0; } @@ -957,14 +952,22 @@ _PyJit_TryInitializeTracing( _Py_CODEUNIT *start_instr, _Py_CODEUNIT *close_loop_instr, int curr_stackdepth, int chain_depth, _PyExitData *exit, int oparg) { + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; // A recursive trace. // Don't trace into the inner call because it will stomp on the previous trace, causing endless retraces. - if (tstate->interp->jit_state.prev_state.code_curr_size > 2) { + if (_tstate->jit_state.prev_state.code_curr_size > 2) { return 0; } if (oparg > 0xFFFF) { return 0; } + if (_tstate->jit_state.code_buffer == NULL) { + _tstate->jit_state.code_buffer = (_PyUOpInstruction *)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE); + if (_tstate->jit_state.code_buffer == NULL) { + // Don't error, just go to next instruction. + return 0; + } + } PyCodeObject *code = _PyFrame_GetCode(frame); #ifdef Py_DEBUG @@ -982,42 +985,43 @@ _PyJit_TryInitializeTracing( chain_depth); #endif - add_to_trace(tstate->interp->jit_state.code_buffer, 0, _START_EXECUTOR, 0, (uintptr_t)start_instr, INSTR_IP(start_instr, code)); - add_to_trace(tstate->interp->jit_state.code_buffer, 1, _MAKE_WARM, 0, 0, 0); - tstate->interp->jit_state.prev_state.code_curr_size = 2; - - tstate->interp->jit_state.prev_state.code_max_size = UOP_MAX_TRACE_LENGTH; - tstate->interp->jit_state.initial_state.start_instr = start_instr; - tstate->interp->jit_state.initial_state.close_loop_instr = close_loop_instr; - tstate->interp->jit_state.initial_state.code = (PyCodeObject *)Py_NewRef(code); - tstate->interp->jit_state.initial_state.func = (PyFunctionObject *)Py_XNewRef(PyStackRef_AsPyObjectBorrow(frame->f_funcobj)); - tstate->interp->jit_state.initial_state.exit = exit; - tstate->interp->jit_state.initial_state.stack_depth = curr_stackdepth; - tstate->interp->jit_state.initial_state.chain_depth = chain_depth; - tstate->interp->jit_state.prev_state.instr_frame = frame; - tstate->interp->jit_state.prev_state.dependencies_still_valid = true; - tstate->interp->jit_state.prev_state.specialize_counter = 0; - tstate->interp->jit_state.prev_state.instr_code = (PyCodeObject *)Py_NewRef(_PyFrame_GetCode(frame)); - tstate->interp->jit_state.prev_state.instr = curr_instr; - tstate->interp->jit_state.prev_state.instr_frame = frame; - tstate->interp->jit_state.prev_state.instr_oparg = oparg; - tstate->interp->jit_state.prev_state.instr_stacklevel = curr_stackdepth; - tstate->interp->jit_state.prev_state.instr_is_super = false; + add_to_trace(_tstate->jit_state.code_buffer, 0, _START_EXECUTOR, 0, (uintptr_t)start_instr, INSTR_IP(start_instr, code)); + add_to_trace(_tstate->jit_state.code_buffer, 1, _MAKE_WARM, 0, 0, 0); + _tstate->jit_state.prev_state.code_curr_size = 2; + + _tstate->jit_state.prev_state.code_max_size = UOP_MAX_TRACE_LENGTH; + _tstate->jit_state.initial_state.start_instr = start_instr; + _tstate->jit_state.initial_state.close_loop_instr = close_loop_instr; + _tstate->jit_state.initial_state.code = (PyCodeObject *)Py_NewRef(code); + _tstate->jit_state.initial_state.func = (PyFunctionObject *)Py_XNewRef(PyStackRef_AsPyObjectBorrow(frame->f_funcobj)); + _tstate->jit_state.initial_state.exit = exit; + _tstate->jit_state.initial_state.stack_depth = curr_stackdepth; + _tstate->jit_state.initial_state.chain_depth = chain_depth; + _tstate->jit_state.prev_state.instr_frame = frame; + _tstate->jit_state.prev_state.dependencies_still_valid = true; + _tstate->jit_state.prev_state.specialize_counter = 0; + _tstate->jit_state.prev_state.instr_code = (PyCodeObject *)Py_NewRef(_PyFrame_GetCode(frame)); + _tstate->jit_state.prev_state.instr = curr_instr; + _tstate->jit_state.prev_state.instr_frame = frame; + _tstate->jit_state.prev_state.instr_oparg = oparg; + _tstate->jit_state.prev_state.instr_stacklevel = curr_stackdepth; + _tstate->jit_state.prev_state.instr_is_super = false; assert(curr_instr->op.code == JUMP_BACKWARD_JIT || (exit != NULL)); - tstate->interp->jit_state.initial_state.jump_backward_instr = curr_instr; + _tstate->jit_state.initial_state.jump_backward_instr = curr_instr; assert(curr_instr->op.code == JUMP_BACKWARD_JIT || (exit != NULL)); - _Py_BloomFilter_Init(&tstate->interp->jit_state.prev_state.dependencies); + _Py_BloomFilter_Init(&_tstate->jit_state.prev_state.dependencies); return 1; } void _PyJit_FinalizeTracing(PyThreadState *tstate) { - Py_CLEAR(tstate->interp->jit_state.initial_state.code); - Py_CLEAR(tstate->interp->jit_state.initial_state.func); - Py_CLEAR(tstate->interp->jit_state.prev_state.instr_code); - tstate->interp->jit_state.prev_state.code_curr_size = 2; - tstate->interp->jit_state.prev_state.code_max_size = UOP_MAX_TRACE_LENGTH - 1; + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + Py_CLEAR(_tstate->jit_state.initial_state.code); + Py_CLEAR(_tstate->jit_state.initial_state.func); + Py_CLEAR(_tstate->jit_state.prev_state.instr_code); + _tstate->jit_state.prev_state.code_curr_size = 2; + _tstate->jit_state.prev_state.code_max_size = UOP_MAX_TRACE_LENGTH - 1; } @@ -1327,6 +1331,7 @@ uop_optimize( _PyExecutorObject **exec_ptr, bool progress_needed) { + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; // Note: the executor has a slightly different set of dependencies than the tracer. // For example: the tracer depends on function and code objects. // The executor may only depend on the code object. @@ -1337,17 +1342,16 @@ uop_optimize( // It is the optimizer's responsibility to add the dependencies it requires on its own. _PyBloomFilter new_dependencies; _Py_BloomFilter_Init(&new_dependencies); - _Py_BloomFilter_Add(&new_dependencies, tstate->interp->jit_state.initial_state.code); - PyInterpreterState *interp = _PyInterpreterState_GET(); - _PyUOpInstruction *buffer = interp->jit_state.code_buffer; + _Py_BloomFilter_Add(&new_dependencies, _tstate->jit_state.initial_state.code); + _PyUOpInstruction *buffer = _tstate->jit_state.code_buffer; OPT_STAT_INC(attempts); char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE"); bool is_noopt = true; if (env_var == NULL || *env_var == '\0' || *env_var > '0') { is_noopt = false; } - int curr_stackentries = tstate->interp->jit_state.initial_state.stack_depth; - int length = interp->jit_state.prev_state.code_curr_size; + int curr_stackentries = _tstate->jit_state.initial_state.stack_depth; + int length = _tstate->jit_state.prev_state.code_curr_size; // Trace too short, don't bother. if (length <= 5) { return 0; @@ -1356,7 +1360,7 @@ uop_optimize( assert(length < UOP_MAX_TRACE_LENGTH); OPT_STAT_INC(traces_created); if (!is_noopt) { - length = _Py_uop_analyze_and_optimize(tstate->interp->jit_state.initial_state.func, buffer, + length = _Py_uop_analyze_and_optimize(_tstate->jit_state.initial_state.func, buffer, length, curr_stackentries, &new_dependencies); if (length <= 0) { @@ -1382,7 +1386,7 @@ uop_optimize( length = prepare_for_execution(buffer, length); assert(length <= UOP_MAX_TRACE_LENGTH); _PyExecutorObject *executor = make_executor_from_uops( - buffer, length, &new_dependencies, tstate->interp->jit_state.initial_state.chain_depth); + buffer, length, &new_dependencies, _tstate->jit_state.initial_state.chain_depth); if (executor == NULL) { return -1; } @@ -1724,10 +1728,10 @@ _PyJit_Tracer_InvalidateDependency(PyThreadState *tstate, void *obj) _PyBloomFilter obj_filter; _Py_BloomFilter_Init(&obj_filter); _Py_BloomFilter_Add(&obj_filter, obj); - - if (bloom_filter_may_contain(&tstate->interp->jit_state.prev_state.dependencies, &obj_filter)) + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + if (bloom_filter_may_contain(&_tstate->jit_state.prev_state.dependencies, &obj_filter)) { - tstate->interp->jit_state.prev_state.dependencies_still_valid = false; + _tstate->jit_state.prev_state.dependencies_still_valid = false; } } /* Invalidate all executors */ diff --git a/Python/pystate.c b/Python/pystate.c index 36d62ecae2ee..a6a54c2f94a7 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -545,9 +545,6 @@ init_interpreter(PyInterpreterState *interp, _Py_brc_init_state(interp); #endif -#ifdef _Py_TIER2 - interp->jit_state.code_buffer = NULL; -#endif llist_init(&interp->mem_free_queue.head); llist_init(&interp->asyncio_tasks_head); interp->asyncio_tasks_lock = (PyMutex){0}; @@ -797,10 +794,6 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) #ifdef _Py_TIER2 _Py_ClearExecutorDeletionList(interp); - if (interp->jit_state.code_buffer != NULL) { - _PyObject_VirtualFree(interp->jit_state.code_buffer, UOP_BUFFER_SIZE); - interp->jit_state.code_buffer = NULL; - } #endif _PyAST_Fini(interp); _PyAtExit_Fini(interp); @@ -1495,6 +1488,9 @@ init_threadstate(_PyThreadStateImpl *_tstate, _tstate->asyncio_running_loop = NULL; _tstate->asyncio_running_task = NULL; +#ifdef _Py_TIER2 + _tstate->jit_state.code_buffer = NULL; +#endif tstate->delete_later = NULL; llist_init(&_tstate->mem_free_queue); @@ -1794,6 +1790,14 @@ tstate_delete_common(PyThreadState *tstate, int release_gil) assert(tstate_impl->refcounts.values == NULL); #endif +#if _Py_TIER2 + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + if (_tstate->jit_state.code_buffer != NULL) { + _PyObject_VirtualFree(_tstate->jit_state.code_buffer, UOP_BUFFER_SIZE); + _tstate->jit_state.code_buffer = NULL; + } +#endif + HEAD_UNLOCK(runtime); // XXX Unbind in PyThreadState_Clear(), or earlier diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 4ede31992a7d..7e4f1bd5c88c 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -162,7 +162,7 @@ class Emitter: assert "specializing" in uop.annotations, uop.name self.out.start_line() self.emit("#if _Py_TIER2\n") - self.emit("tstate->interp->jit_state.prev_state.specialize_counter++;\n") + self.emit("((_PyThreadStateImpl *)tstate)->jit_state.prev_state.specialize_counter++;\n") self.emit("#endif\n") self.emit(tkn) emit_to(self.out, tkn_iter, "SEMI")