]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-143421: Move `JitOptContext` from stack allocation to per-thread heap allocation...
authorHai Zhu <35182391+cocolato@users.noreply.github.com>
Thu, 8 Jan 2026 19:38:21 +0000 (03:38 +0800)
committerGitHub <noreply@github.com>
Thu, 8 Jan 2026 19:38:21 +0000 (19:38 +0000)
* move JitOptContext to _PyThreadStateImpl
* make _PyUOpInstruction buffer a part of _PyThreadStateImpl

Co-authored-by: Kumar Aditya <kumaraditya@python.org>
Include/internal/pycore_optimizer.h
Include/internal/pycore_optimizer_types.h [new file with mode: 0644]
Include/internal/pycore_tstate.h
Python/optimizer.c
Python/optimizer_analysis.c
Python/pystate.c

index 6a0fc1a59e7965367b744f2b029c61a5b4257588..d1d22c77507c6c9ddc27db66d568ad4c6746edd7 100644 (file)
@@ -12,6 +12,7 @@ extern "C" {
 #include "pycore_uop.h"           // _PyUOpInstruction
 #include "pycore_uop_ids.h"
 #include "pycore_stackref.h"      // _PyStackRef
+#include "pycore_optimizer_types.h"
 #include <stdbool.h>
 
 
@@ -84,7 +85,7 @@ PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp);
 #define JIT_CLEANUP_THRESHOLD 1000
 
 int _Py_uop_analyze_and_optimize(
-    PyFunctionObject *func,
+    _PyThreadStateImpl *tstate,
     _PyUOpInstruction *trace, int trace_len, int curr_stackentries,
     _PyBloomFilter *dependencies);
 
@@ -112,86 +113,6 @@ static inline uint16_t uop_get_error_target(const _PyUOpInstruction *inst)
     return inst->error_target;
 }
 
-// Holds locals, stack, locals, stack ... co_consts (in that order)
-#define MAX_ABSTRACT_INTERP_SIZE 4096
-
-#define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5)
-
-// Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH())
-#define MAX_ABSTRACT_FRAME_DEPTH (16)
-
-// The maximum number of side exits that we can take before requiring forward
-// progress (and inserting a new ENTER_EXECUTOR instruction). In practice, this
-// is the "maximum amount of polymorphism" that an isolated trace tree can
-// handle before rejoining the rest of the program.
-#define MAX_CHAIN_DEPTH 4
-
-/* Symbols */
-/* See explanation in optimizer_symbols.c */
-
-
-typedef enum _JitSymType {
-    JIT_SYM_UNKNOWN_TAG = 1,
-    JIT_SYM_NULL_TAG = 2,
-    JIT_SYM_NON_NULL_TAG = 3,
-    JIT_SYM_BOTTOM_TAG = 4,
-    JIT_SYM_TYPE_VERSION_TAG = 5,
-    JIT_SYM_KNOWN_CLASS_TAG = 6,
-    JIT_SYM_KNOWN_VALUE_TAG = 7,
-    JIT_SYM_TUPLE_TAG = 8,
-    JIT_SYM_TRUTHINESS_TAG = 9,
-    JIT_SYM_COMPACT_INT = 10,
-} JitSymType;
-
-typedef struct _jit_opt_known_class {
-    uint8_t tag;
-    uint32_t version;
-    PyTypeObject *type;
-} JitOptKnownClass;
-
-typedef struct _jit_opt_known_version {
-    uint8_t tag;
-    uint32_t version;
-} JitOptKnownVersion;
-
-typedef struct _jit_opt_known_value {
-    uint8_t tag;
-    PyObject *value;
-} JitOptKnownValue;
-
-#define MAX_SYMBOLIC_TUPLE_SIZE 7
-
-typedef struct _jit_opt_tuple {
-    uint8_t tag;
-    uint8_t length;
-    uint16_t items[MAX_SYMBOLIC_TUPLE_SIZE];
-} JitOptTuple;
-
-typedef struct {
-    uint8_t tag;
-    bool invert;
-    uint16_t value;
-} JitOptTruthiness;
-
-typedef struct {
-    uint8_t tag;
-} JitOptCompactInt;
-
-typedef union _jit_opt_symbol {
-    uint8_t tag;
-    JitOptKnownClass cls;
-    JitOptKnownValue value;
-    JitOptKnownVersion version;
-    JitOptTuple tuple;
-    JitOptTruthiness truthiness;
-    JitOptCompactInt compact;
-} JitOptSymbol;
-
-
-// This mimics the _PyStackRef API
-typedef union {
-    uintptr_t bits;
-} JitOptRef;
 
 #define REF_IS_BORROWED 1
 
@@ -238,48 +159,6 @@ PyJitRef_IsBorrowed(JitOptRef ref)
     return (ref.bits & REF_IS_BORROWED) == REF_IS_BORROWED;
 }
 
-struct _Py_UOpsAbstractFrame {
-    bool globals_watched;
-     // The version number of the globals dicts, once checked. 0 if unchecked.
-    uint32_t globals_checked_version;
-    // Max stacklen
-    int stack_len;
-    int locals_len;
-    PyFunctionObject *func;
-    PyCodeObject *code;
-
-    JitOptRef *stack_pointer;
-    JitOptRef *stack;
-    JitOptRef *locals;
-};
-
-typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;
-
-typedef struct ty_arena {
-    int ty_curr_number;
-    int ty_max_number;
-    JitOptSymbol arena[TY_ARENA_SIZE];
-} ty_arena;
-
-typedef struct _JitOptContext {
-    char done;
-    char out_of_space;
-    bool contradiction;
-     // Has the builtins dict been watched?
-    bool builtins_watched;
-    // The current "executing" frame.
-    _Py_UOpsAbstractFrame *frame;
-    _Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH];
-    int curr_frame_depth;
-
-    // Arena for the symbolic types.
-    ty_arena t_arena;
-
-    JitOptRef *n_consumed;
-    JitOptRef *limit;
-    JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
-} JitOptContext;
-
 extern bool _Py_uop_sym_is_null(JitOptRef sym);
 extern bool _Py_uop_sym_is_not_null(JitOptRef sym);
 extern bool _Py_uop_sym_is_const(JitOptContext *ctx, JitOptRef sym);
diff --git a/Include/internal/pycore_optimizer_types.h b/Include/internal/pycore_optimizer_types.h
new file mode 100644 (file)
index 0000000..de8e509
--- /dev/null
@@ -0,0 +1,137 @@
+#ifndef Py_INTERNAL_OPTIMIZER_TYPES_H
+#define Py_INTERNAL_OPTIMIZER_TYPES_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef Py_BUILD_CORE
+#  error "this header requires Py_BUILD_CORE define"
+#endif
+
+#include "pycore_uop.h"  // UOP_MAX_TRACE_LENGTH
+
+// Holds locals, stack, locals, stack ... co_consts (in that order)
+#define MAX_ABSTRACT_INTERP_SIZE 4096
+
+#define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5)
+
+// Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH())
+#define MAX_ABSTRACT_FRAME_DEPTH (16)
+
+// The maximum number of side exits that we can take before requiring forward
+// progress (and inserting a new ENTER_EXECUTOR instruction). In practice, this
+// is the "maximum amount of polymorphism" that an isolated trace tree can
+// handle before rejoining the rest of the program.
+#define MAX_CHAIN_DEPTH 4
+
+/* Symbols */
+/* See explanation in optimizer_symbols.c */
+
+
+typedef enum _JitSymType {
+    JIT_SYM_UNKNOWN_TAG = 1,
+    JIT_SYM_NULL_TAG = 2,
+    JIT_SYM_NON_NULL_TAG = 3,
+    JIT_SYM_BOTTOM_TAG = 4,
+    JIT_SYM_TYPE_VERSION_TAG = 5,
+    JIT_SYM_KNOWN_CLASS_TAG = 6,
+    JIT_SYM_KNOWN_VALUE_TAG = 7,
+    JIT_SYM_TUPLE_TAG = 8,
+    JIT_SYM_TRUTHINESS_TAG = 9,
+    JIT_SYM_COMPACT_INT = 10,
+} JitSymType;
+
+typedef struct _jit_opt_known_class {
+    uint8_t tag;
+    uint32_t version;
+    PyTypeObject *type;
+} JitOptKnownClass;
+
+typedef struct _jit_opt_known_version {
+    uint8_t tag;
+    uint32_t version;
+} JitOptKnownVersion;
+
+typedef struct _jit_opt_known_value {
+    uint8_t tag;
+    PyObject *value;
+} JitOptKnownValue;
+
+#define MAX_SYMBOLIC_TUPLE_SIZE 7
+
+typedef struct _jit_opt_tuple {
+    uint8_t tag;
+    uint8_t length;
+    uint16_t items[MAX_SYMBOLIC_TUPLE_SIZE];
+} JitOptTuple;
+
+typedef struct {
+    uint8_t tag;
+    bool invert;
+    uint16_t value;
+} JitOptTruthiness;
+
+typedef struct {
+    uint8_t tag;
+} JitOptCompactInt;
+
+typedef union _jit_opt_symbol {
+    uint8_t tag;
+    JitOptKnownClass cls;
+    JitOptKnownValue value;
+    JitOptKnownVersion version;
+    JitOptTuple tuple;
+    JitOptTruthiness truthiness;
+    JitOptCompactInt compact;
+} JitOptSymbol;
+
+// This mimics the _PyStackRef API
+typedef union {
+    uintptr_t bits;
+} JitOptRef;
+
+typedef struct _Py_UOpsAbstractFrame {
+    bool globals_watched;
+    // The version number of the globals dicts, once checked. 0 if unchecked.
+    uint32_t globals_checked_version;
+    // Max stacklen
+    int stack_len;
+    int locals_len;
+    PyFunctionObject *func;
+    PyCodeObject *code;
+
+    JitOptRef *stack_pointer;
+    JitOptRef *stack;
+    JitOptRef *locals;
+} _Py_UOpsAbstractFrame;
+
+typedef struct ty_arena {
+    int ty_curr_number;
+    int ty_max_number;
+    JitOptSymbol arena[TY_ARENA_SIZE];
+} ty_arena;
+
+typedef struct _JitOptContext {
+    char done;
+    char out_of_space;
+    bool contradiction;
+    // Has the builtins dict been watched?
+    bool builtins_watched;
+    // The current "executing" frame.
+    _Py_UOpsAbstractFrame *frame;
+    _Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH];
+    int curr_frame_depth;
+
+    // Arena for the symbolic types.
+    ty_arena t_arena;
+
+    JitOptRef *n_consumed;
+    JitOptRef *limit;
+    JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
+} JitOptContext;
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_INTERNAL_OPTIMIZER_TYPES_H */
index d8f4bfef98af7e72594233de947d0a8b6c9f492b..81cabb4dca47e44906460137a31e19a71cdc986e 100644 (file)
@@ -12,6 +12,7 @@ extern "C" {
 #include "pycore_freelist_state.h"  // struct _Py_freelists
 #include "pycore_interpframe_structs.h"  // _PyInterpreterFrame
 #include "pycore_mimalloc.h"        // struct _mimalloc_thread_state
+#include "pycore_optimizer_types.h" // JitOptContext
 #include "pycore_qsbr.h"            // struct qsbr
 #include "pycore_uop.h"             // struct _PyUOpInstruction
 #include "pycore_structs.h"
@@ -52,10 +53,11 @@ typedef struct _PyJitTracerTranslatorState {
 } _PyJitTracerTranslatorState;
 
 typedef struct _PyJitTracerState {
-    _PyUOpInstruction *code_buffer;
     _PyJitTracerInitialState initial_state;
     _PyJitTracerPreviousState prev_state;
     _PyJitTracerTranslatorState translator_state;
+    JitOptContext opt_context;
+    _PyUOpInstruction code_buffer[UOP_MAX_TRACE_LENGTH];
 } _PyJitTracerState;
 
 #endif
index d32fae2e489af48d383f82421b8e818a106fb124..73617f6ca264251e547ea71da4c5eafa7a5d8fc1 100644 (file)
@@ -1025,13 +1025,6 @@ _PyJit_TryInitializeTracing(
     if (oparg > 0xFFFF) {
         return 0;
     }
-    if (_tstate->jit_tracer_state.code_buffer == NULL) {
-        _tstate->jit_tracer_state.code_buffer = (_PyUOpInstruction *)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE);
-        if (_tstate->jit_tracer_state.code_buffer == NULL) {
-            // Don't error, just go to next instruction.
-            return 0;
-        }
-    }
     PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj);
     if (func == NULL) {
         return 0;
@@ -1484,8 +1477,8 @@ uop_optimize(
     OPT_STAT_INC(traces_created);
     if (!is_noopt) {
         length = _Py_uop_analyze_and_optimize(
-            _tstate->jit_tracer_state.initial_state.func,
-            buffer,length,
+            _tstate,
+            buffer, length,
             curr_stackentries, dependencies);
         if (length <= 0) {
             return length;
index 29a088e43c2a0f779b3de0aed5cf44c156dda0eb..56d4f9945d6908d7482cde7b63ff6fd581260888 100644 (file)
@@ -18,6 +18,7 @@
 #include "pycore_opcode_metadata.h"
 #include "pycore_opcode_utils.h"
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
+#include "pycore_tstate.h"        // _PyThreadStateImpl
 #include "pycore_uop_metadata.h"
 #include "pycore_long.h"
 #include "pycore_interpframe.h"  // _PyFrame_GetCode
@@ -334,7 +335,7 @@ _Py_opt_assert_within_stack_bounds(
 /* >0 (length) for success, 0 for not ready, clears all possible errors. */
 static int
 optimize_uops(
-    PyFunctionObject *func,
+    _PyThreadStateImpl *tstate,
     _PyUOpInstruction *trace,
     int trace_len,
     int curr_stacklen,
@@ -342,9 +343,9 @@ optimize_uops(
 )
 {
     assert(!PyErr_Occurred());
+    PyFunctionObject *func = tstate->jit_tracer_state.initial_state.func;
 
-    JitOptContext context;
-    JitOptContext *ctx = &context;
+    JitOptContext *ctx = &tstate->jit_tracer_state.opt_context;
     uint32_t opcode = UINT16_MAX;
 
     // Make sure that watchers are set up
@@ -574,7 +575,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
 //  > 0 - length of optimized trace
 int
 _Py_uop_analyze_and_optimize(
-    PyFunctionObject *func,
+    _PyThreadStateImpl *tstate,
     _PyUOpInstruction *buffer,
     int length,
     int curr_stacklen,
@@ -584,7 +585,7 @@ _Py_uop_analyze_and_optimize(
     OPT_STAT_INC(optimizer_attempts);
 
     length = optimize_uops(
-         func, buffer,
+         tstate, buffer,
          length, curr_stacklen, dependencies);
 
     if (length == 0) {
index 23853f697924509068fd0b0aaf21c69ce9692ea6..74507efa5b4cf33fbf8d7dfed72480c2522c1f4d 100644 (file)
@@ -1553,7 +1553,6 @@ init_threadstate(_PyThreadStateImpl *_tstate,
     init_policy(&_tstate->policy.jit.side_exit_initial_backoff,
                 "PYTHON_JIT_SIDE_EXIT_INITIAL_BACKOFF",
                 SIDE_EXIT_INITIAL_BACKOFF, 0, MAX_BACKOFF);
-    _tstate->jit_tracer_state.code_buffer = NULL;
 #endif
     tstate->delete_later = NULL;
 
@@ -1868,14 +1867,6 @@ tstate_delete_common(PyThreadState *tstate, int release_gil)
     assert(tstate_impl->refcounts.values == NULL);
 #endif
 
-#if _Py_TIER2
-    _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
-    if (_tstate->jit_tracer_state.code_buffer != NULL) {
-        _PyObject_VirtualFree(_tstate->jit_tracer_state.code_buffer, UOP_BUFFER_SIZE);
-        _tstate->jit_tracer_state.code_buffer = NULL;
-    }
-#endif
-
     HEAD_UNLOCK(runtime);
 
     // XXX Unbind in PyThreadState_Clear(), or earlier