]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-106581: Project through calls (#108067)
authorGuido van Rossum <guido@python.org>
Thu, 17 Aug 2023 18:29:58 +0000 (11:29 -0700)
committerGitHub <noreply@github.com>
Thu, 17 Aug 2023 18:29:58 +0000 (11:29 -0700)
This finishes the work begun in gh-107760. When, while projecting a superblock, we encounter a call to a short, simple function, the superblock will now enter the function using `_PUSH_FRAME`, continue through it, and leave it using `_POP_FRAME`, and then continue through the original code. Multiple frame pushes and pops are even possible. It is also possible to stop appending to the superblock in the middle of a called function, when running out of space or encountering an unsupported bytecode.

16 files changed:
Include/internal/pycore_ceval.h
Include/internal/pycore_function.h
Include/internal/pycore_opcode_metadata.h
Lib/test/test_capi/test_misc.py
Lib/test/test_code.py
Objects/codeobject.c
Objects/funcobject.c
Python/abstract_interp_cases.c.h
Python/bytecodes.c
Python/ceval.c
Python/ceval_macros.h
Python/executor_cases.c.h
Python/generated_cases.c.h
Python/optimizer.c
Tools/cases_generator/analysis.py
Tools/cases_generator/stacking.py

index 05b7380597812bdb06399f18e9cb5eaab9d64699..0e3a99be8c36aac273723dc4f975c62571eb4995 100644 (file)
@@ -171,6 +171,7 @@ void _PyEval_FormatKwargsError(PyThreadState *tstate, PyObject *func, PyObject *
 PyObject *_PyEval_MatchClass(PyThreadState *tstate, PyObject *subject, PyObject *type, Py_ssize_t nargs, PyObject *kwargs);
 PyObject *_PyEval_MatchKeys(PyThreadState *tstate, PyObject *map, PyObject *keys);
 int _PyEval_UnpackIterable(PyThreadState *tstate, PyObject *v, int argcnt, int argcntafter, PyObject **sp);
+void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame);
 
 
 #ifdef __cplusplus
index e844d323ec7927c1f18d0106d80c3fa5830770d9..3f3da8a44b77e4c1bb031378c8af413ff1ee0d41 100644 (file)
@@ -16,13 +16,22 @@ extern PyObject* _PyFunction_Vectorcall(
 
 #define FUNC_MAX_WATCHERS 8
 
+#define FUNC_VERSION_CACHE_SIZE (1<<12)  /* Must be a power of 2 */
 struct _py_func_state {
     uint32_t next_version;
+    // Borrowed references to function objects whose
+    // func_version % FUNC_VERSION_CACHE_SIZE
+    // once was equal to the index in the table.
+    // They are cleared when the function is deallocated.
+    PyFunctionObject *func_version_cache[FUNC_VERSION_CACHE_SIZE];
 };
 
 extern PyFunctionObject* _PyFunction_FromConstructor(PyFrameConstructor *constr);
 
 extern uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func);
+extern void _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version);
+PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version);
+
 extern PyObject *_Py_set_function_type_params(
     PyThreadState* unused, PyObject *func, PyObject *type_params);
 
index afe8aa172b703b66ad7fbaf013886a33dd87ba0b..396d194ed2734e3ca97e9f76c69b7163a90b494e 100644 (file)
 #define _BINARY_OP_SUBTRACT_FLOAT 309
 #define _GUARD_BOTH_UNICODE 310
 #define _BINARY_OP_ADD_UNICODE 311
-#define _LOAD_LOCALS 312
-#define _LOAD_FROM_DICT_OR_GLOBALS 313
-#define _GUARD_GLOBALS_VERSION 314
-#define _GUARD_BUILTINS_VERSION 315
-#define _LOAD_GLOBAL_MODULE 316
-#define _LOAD_GLOBAL_BUILTINS 317
-#define _GUARD_TYPE_VERSION 318
-#define _CHECK_MANAGED_OBJECT_HAS_VALUES 319
-#define _LOAD_ATTR_INSTANCE_VALUE 320
-#define IS_NONE 321
-#define _ITER_CHECK_LIST 322
-#define _IS_ITER_EXHAUSTED_LIST 323
-#define _ITER_NEXT_LIST 324
-#define _ITER_CHECK_TUPLE 325
-#define _IS_ITER_EXHAUSTED_TUPLE 326
-#define _ITER_NEXT_TUPLE 327
-#define _ITER_CHECK_RANGE 328
-#define _IS_ITER_EXHAUSTED_RANGE 329
-#define _ITER_NEXT_RANGE 330
-#define _CHECK_PEP_523 331
-#define _CHECK_FUNCTION_EXACT_ARGS 332
-#define _CHECK_STACK_SPACE 333
-#define _INIT_CALL_PY_EXACT_ARGS 334
-#define _PUSH_FRAME 335
-#define _POP_JUMP_IF_FALSE 336
-#define _POP_JUMP_IF_TRUE 337
-#define JUMP_TO_TOP 338
-#define SAVE_CURRENT_IP 339
-#define INSERT 340
+#define _POP_FRAME 312
+#define _LOAD_LOCALS 313
+#define _LOAD_FROM_DICT_OR_GLOBALS 314
+#define _GUARD_GLOBALS_VERSION 315
+#define _GUARD_BUILTINS_VERSION 316
+#define _LOAD_GLOBAL_MODULE 317
+#define _LOAD_GLOBAL_BUILTINS 318
+#define _GUARD_TYPE_VERSION 319
+#define _CHECK_MANAGED_OBJECT_HAS_VALUES 320
+#define _LOAD_ATTR_INSTANCE_VALUE 321
+#define IS_NONE 322
+#define _ITER_CHECK_LIST 323
+#define _IS_ITER_EXHAUSTED_LIST 324
+#define _ITER_NEXT_LIST 325
+#define _ITER_CHECK_TUPLE 326
+#define _IS_ITER_EXHAUSTED_TUPLE 327
+#define _ITER_NEXT_TUPLE 328
+#define _ITER_CHECK_RANGE 329
+#define _IS_ITER_EXHAUSTED_RANGE 330
+#define _ITER_NEXT_RANGE 331
+#define _CHECK_PEP_523 332
+#define _CHECK_FUNCTION_EXACT_ARGS 333
+#define _CHECK_STACK_SPACE 334
+#define _INIT_CALL_PY_EXACT_ARGS 335
+#define _PUSH_FRAME 336
+#define _POP_JUMP_IF_FALSE 337
+#define _POP_JUMP_IF_TRUE 338
+#define JUMP_TO_TOP 339
+#define SAVE_CURRENT_IP 340
+#define INSERT 341
 
 extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump);
 #ifdef NEED_OPCODE_METADATA
@@ -197,6 +198,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump)  {
             return oparg;
         case INTERPRETER_EXIT:
             return 1;
+        case _POP_FRAME:
+            return 1;
         case RETURN_VALUE:
             return 1;
         case INSTRUMENTED_RETURN_VALUE:
@@ -723,6 +726,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump)  {
             return 0;
         case INTERPRETER_EXIT:
             return 0;
+        case _POP_FRAME:
+            return 0;
         case RETURN_VALUE:
             return 0;
         case INSTRUMENTED_RETURN_VALUE:
@@ -1191,7 +1196,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = {
     [STORE_FAST_STORE_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG },
     [POP_TOP] = { true, INSTR_FMT_IX, 0 },
     [PUSH_NULL] = { true, INSTR_FMT_IX, 0 },
-    [END_FOR] = { true, INSTR_FMT_IB, 0 },
+    [END_FOR] = { true, INSTR_FMT_IX, 0 },
     [INSTRUMENTED_END_FOR] = { true, INSTR_FMT_IX, 0 },
     [END_SEND] = { true, INSTR_FMT_IX, 0 },
     [INSTRUMENTED_END_SEND] = { true, INSTR_FMT_IX, 0 },
@@ -1205,14 +1210,14 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = {
     [TO_BOOL_STR] = { true, INSTR_FMT_IXC00, 0 },
     [TO_BOOL_ALWAYS_TRUE] = { true, INSTR_FMT_IXC00, 0 },
     [UNARY_INVERT] = { true, INSTR_FMT_IX, 0 },
-    [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IBC, 0 },
-    [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IBC, 0 },
-    [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IBC, 0 },
-    [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IBC, 0 },
-    [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IBC, 0 },
-    [BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IBC, 0 },
-    [BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IBC, 0 },
-    [BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IB, HAS_LOCAL_FLAG },
+    [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC, 0 },
+    [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC, 0 },
+    [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC, 0 },
+    [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC, 0 },
+    [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC, 0 },
+    [BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC, 0 },
+    [BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC, 0 },
+    [BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IX, HAS_LOCAL_FLAG },
     [BINARY_SUBSCR] = { true, INSTR_FMT_IXC, 0 },
     [BINARY_SLICE] = { true, INSTR_FMT_IX, 0 },
     [STORE_SLICE] = { true, INSTR_FMT_IX, 0 },
@@ -1259,7 +1264,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = {
     [DELETE_ATTR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG },
     [STORE_GLOBAL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG },
     [DELETE_GLOBAL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG },
-    [LOAD_LOCALS] = { true, INSTR_FMT_IB, 0 },
+    [LOAD_LOCALS] = { true, INSTR_FMT_IX, 0 },
     [LOAD_NAME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG },
     [LOAD_FROM_DICT_OR_GLOBALS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG },
     [LOAD_GLOBAL] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG },
@@ -1400,6 +1405,7 @@ extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACR
 #ifdef NEED_OPCODE_METADATA
 const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPANSION_SIZE] = {
     [NOP] = { .nuops = 1, .uops = { { NOP, 0, 0 } } },
+    [RESUME] = { .nuops = 1, .uops = { { RESUME, 0, 0 } } },
     [LOAD_FAST_CHECK] = { .nuops = 1, .uops = { { LOAD_FAST_CHECK, 0, 0 } } },
     [LOAD_FAST] = { .nuops = 1, .uops = { { LOAD_FAST, 0, 0 } } },
     [LOAD_FAST_AND_CLEAR] = { .nuops = 1, .uops = { { LOAD_FAST_AND_CLEAR, 0, 0 } } },
@@ -1444,6 +1450,8 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN
     [DELETE_SUBSCR] = { .nuops = 1, .uops = { { DELETE_SUBSCR, 0, 0 } } },
     [CALL_INTRINSIC_1] = { .nuops = 1, .uops = { { CALL_INTRINSIC_1, 0, 0 } } },
     [CALL_INTRINSIC_2] = { .nuops = 1, .uops = { { CALL_INTRINSIC_2, 0, 0 } } },
+    [RETURN_VALUE] = { .nuops = 3, .uops = { { SAVE_IP, 7, 0 }, { SAVE_CURRENT_IP, 0, 0 }, { _POP_FRAME, 0, 0 } } },
+    [RETURN_CONST] = { .nuops = 4, .uops = { { LOAD_CONST, 0, 0 }, { SAVE_IP, 7, 0 }, { SAVE_CURRENT_IP, 0, 0 }, { _POP_FRAME, 0, 0 } } },
     [GET_AITER] = { .nuops = 1, .uops = { { GET_AITER, 0, 0 } } },
     [GET_ANEXT] = { .nuops = 1, .uops = { { GET_ANEXT, 0, 0 } } },
     [GET_AWAITABLE] = { .nuops = 1, .uops = { { GET_AWAITABLE, 0, 0 } } },
@@ -1545,6 +1553,7 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = {
     [_BINARY_OP_SUBTRACT_FLOAT] = "_BINARY_OP_SUBTRACT_FLOAT",
     [_GUARD_BOTH_UNICODE] = "_GUARD_BOTH_UNICODE",
     [_BINARY_OP_ADD_UNICODE] = "_BINARY_OP_ADD_UNICODE",
+    [_POP_FRAME] = "_POP_FRAME",
     [_LOAD_LOCALS] = "_LOAD_LOCALS",
     [_LOAD_FROM_DICT_OR_GLOBALS] = "_LOAD_FROM_DICT_OR_GLOBALS",
     [_GUARD_GLOBALS_VERSION] = "_GUARD_GLOBALS_VERSION",
index 3dfbfdc26e7416518eb88512fe3bc7ebd441ef36..18a0476122dabfae6cca81da300c9513f2ea3632 100644 (file)
@@ -2633,6 +2633,7 @@ class TestUops(unittest.TestCase):
         self.assertIsNotNone(ex)
         uops = {opname for opname, _, _ in ex}
         self.assertIn("_PUSH_FRAME", uops)
+        self.assertIn("_BINARY_OP_ADD_INT", uops)
 
 
 
index ca06a39f5df142f44caa63321e157d9a94318f71..e056c16466e8c47aeb7f89ec1b7c05e1a790dd3d 100644 (file)
@@ -264,7 +264,7 @@ class CodeTest(unittest.TestCase):
             ("co_posonlyargcount", 0),
             ("co_kwonlyargcount", 0),
             ("co_nlocals", 1),
-            ("co_stacksize", 0),
+            ("co_stacksize", 1),
             ("co_flags", code.co_flags | inspect.CO_COROUTINE),
             ("co_firstlineno", 100),
             ("co_code", code2.co_code),
index 2c9c8cec77ff9f320d48b6005eaa388b1b09c889..4d6efe938f45d6c43fd9c908d868d3bf416c3726 100644 (file)
@@ -396,6 +396,9 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
     int nlocals, ncellvars, nfreevars;
     get_localsplus_counts(con->localsplusnames, con->localspluskinds,
                           &nlocals, &ncellvars, &nfreevars);
+    if (con->stacksize == 0) {
+        con->stacksize = 1;
+    }
 
     co->co_filename = Py_NewRef(con->filename);
     co->co_name = Py_NewRef(con->name);
index 8c0bface3ac710bceeb36e4f9fd0d82d40affed2..33191d23f18230b84eeba8c8f8e0bdbd923fbd59 100644 (file)
@@ -223,7 +223,73 @@ error:
     return NULL;
 }
 
-uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func)
+/*
+Function versions
+-----------------
+
+Function versions are used to detect when a function object has been
+updated, invalidating inline cache data used by the `CALL` bytecode
+(notably `CALL_PY_EXACT_ARGS` and a few other `CALL` specializations).
+
+They are also used by the Tier 2 superblock creation code to find
+the function being called (and from there the code object).
+
+How does a function's `func_version` field get initialized?
+
+- `PyFunction_New` and friends initialize it to 0.
+- The `MAKE_FUNCTION` instruction sets it from the code's `co_version`.
+- It is reset to 0 when various attributes like `__code__` are set.
+- A new version is allocated by `_PyFunction_GetVersionForCurrentState`
+  when the specializer needs a version and the version is 0.
+
+The latter allocates versions using a counter in the interpreter state;
+when the counter wraps around to 0, no more versions are allocated.
+There is one other special case: functions with a non-standard
+`vectorcall` field are not given a version.
+
+When the function version is 0, the `CALL` bytecode is not specialized.
+
+Code object versions
+--------------------
+
+So where to code objects get their `co_version`? There is a single
+static global counter, `_Py_next_func_version`. This is initialized in
+the generated (!) file `Python/deepfreeze/deepfreeze.c`, to 1 plus the
+number of deep-frozen function objects in that file.
+(In `_bootstrap_python.c` and `freeze_module.c` it is initialized to 1.)
+
+Code objects get a new `co_version` allocated from this counter upon
+creation. Since code objects are nominally immutable, `co_version` can
+not be invalidated. The only way it can be 0 is when 2**32 or more
+code objects have been created during the process's lifetime.
+(The counter isn't reset by `fork()`, extending the lifetime.)
+*/
+
+void
+_PyFunction_SetVersion(PyFunctionObject *func, uint32_t version)
+{
+    func->func_version = version;
+    if (version != 0) {
+        PyInterpreterState *interp = _PyInterpreterState_GET();
+        interp->func_state.func_version_cache[
+            version % FUNC_VERSION_CACHE_SIZE] = func;
+    }
+}
+
+PyFunctionObject *
+_PyFunction_LookupByVersion(uint32_t version)
+{
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    PyFunctionObject *func = interp->func_state.func_version_cache[
+        version % FUNC_VERSION_CACHE_SIZE];
+    if (func != NULL && func->func_version == version) {
+        return (PyFunctionObject *)Py_NewRef(func);
+    }
+    return NULL;
+}
+
+uint32_t
+_PyFunction_GetVersionForCurrentState(PyFunctionObject *func)
 {
     if (func->func_version != 0) {
         return func->func_version;
@@ -236,7 +302,7 @@ uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func)
         return 0;
     }
     uint32_t v = interp->func_state.next_version++;
-    func->func_version = v;
+    _PyFunction_SetVersion(func, v);
     return v;
 }
 
@@ -851,6 +917,15 @@ func_dealloc(PyFunctionObject *op)
     if (op->func_weakreflist != NULL) {
         PyObject_ClearWeakRefs((PyObject *) op);
     }
+    if (op->func_version != 0) {
+        PyInterpreterState *interp = _PyInterpreterState_GET();
+        PyFunctionObject **slot =
+            interp->func_state.func_version_cache
+            + (op->func_version % FUNC_VERSION_CACHE_SIZE);
+        if (*slot == op) {
+            *slot = NULL;
+        }
+    }
     (void)func_clear(op);
     // These aren't cleared by func_clear().
     Py_DECREF(op->func_code);
index eef071119bcd8481ac1d21205dba0ae22cc891f1..1b99b929fa8014873b5b6b2a1c998fb14a7fee81 100644 (file)
@@ -7,6 +7,10 @@
             break;
         }
 
+        case RESUME: {
+            break;
+        }
+
         case POP_TOP: {
             STACK_SHRINK(1);
             break;
             break;
         }
 
+        case _POP_FRAME: {
+            STACK_SHRINK(1);
+            break;
+        }
+
         case GET_AITER: {
             PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true);
             break;
index 6f17472e04e5e3fe77dc35d6510b917a7ec88408..ae459cabaddc058b35989364ad12a1e5abd15a55 100644 (file)
@@ -133,6 +133,7 @@ dummy_func(
         }
 
         inst(RESUME, (--)) {
+            #if TIER_ONE
             assert(frame == tstate->current_frame);
             /* Possibly combine this with eval breaker */
             if (_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version) {
@@ -140,7 +141,9 @@ dummy_func(
                 ERROR_IF(err, error);
                 next_instr--;
             }
-            else if (oparg < 2) {
+            else
+            #endif
+            if (oparg < 2) {
                 CHECK_EVAL_BREAKER();
             }
         }
@@ -757,21 +760,37 @@ dummy_func(
             return retval;
         }
 
-        inst(RETURN_VALUE, (retval --)) {
-            STACK_SHRINK(1);
+        // The stack effect here is ambiguous.
+        // We definitely pop the return value off the stack on entry.
+        // We also push it onto the stack on exit, but that's a
+        // different frame, and it's accounted for by _PUSH_FRAME.
+        op(_POP_FRAME, (retval --)) {
             assert(EMPTY());
             _PyFrame_SetStackPointer(frame, stack_pointer);
             _Py_LeaveRecursiveCallPy(tstate);
-            assert(frame != &entry_frame);
             // GH-99729: We need to unlink the frame *before* clearing it:
             _PyInterpreterFrame *dying = frame;
+            #if TIER_ONE
+            assert(frame != &entry_frame);
+            #endif
             frame = tstate->current_frame = dying->previous;
-            _PyEvalFrameClearAndPop(tstate, dying);
+            _PyEval_FrameClearAndPop(tstate, dying);
             frame->prev_instr += frame->return_offset;
             _PyFrame_StackPush(frame, retval);
+            #if TIER_ONE
             goto resume_frame;
+            #endif
+            #if TIER_TWO
+            stack_pointer = _PyFrame_GetStackPointer(frame);
+            ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
+            #endif
         }
 
+        macro(RETURN_VALUE) =
+            SAVE_IP +  // Tier 2 only; special-cased oparg
+            SAVE_CURRENT_IP +  // Sets frame->prev_instr
+            _POP_FRAME;
+
         inst(INSTRUMENTED_RETURN_VALUE, (retval --)) {
             int err = _Py_call_instrumentation_arg(
                     tstate, PY_MONITORING_EVENT_PY_RETURN,
@@ -785,27 +804,17 @@ dummy_func(
             // GH-99729: We need to unlink the frame *before* clearing it:
             _PyInterpreterFrame *dying = frame;
             frame = tstate->current_frame = dying->previous;
-            _PyEvalFrameClearAndPop(tstate, dying);
+            _PyEval_FrameClearAndPop(tstate, dying);
             frame->prev_instr += frame->return_offset;
             _PyFrame_StackPush(frame, retval);
             goto resume_frame;
         }
 
-        inst(RETURN_CONST, (--)) {
-            PyObject *retval = GETITEM(FRAME_CO_CONSTS, oparg);
-            Py_INCREF(retval);
-            assert(EMPTY());
-            _PyFrame_SetStackPointer(frame, stack_pointer);
-            _Py_LeaveRecursiveCallPy(tstate);
-            assert(frame != &entry_frame);
-            // GH-99729: We need to unlink the frame *before* clearing it:
-            _PyInterpreterFrame *dying = frame;
-            frame = tstate->current_frame = dying->previous;
-            _PyEvalFrameClearAndPop(tstate, dying);
-            frame->prev_instr += frame->return_offset;
-            _PyFrame_StackPush(frame, retval);
-            goto resume_frame;
-        }
+        macro(RETURN_CONST) =
+            LOAD_CONST +
+            SAVE_IP +  // Tier 2 only; special-cased oparg
+            SAVE_CURRENT_IP +  // Sets frame->prev_instr
+            _POP_FRAME;
 
         inst(INSTRUMENTED_RETURN_CONST, (--)) {
             PyObject *retval = GETITEM(FRAME_CO_CONSTS, oparg);
@@ -821,7 +830,7 @@ dummy_func(
             // GH-99729: We need to unlink the frame *before* clearing it:
             _PyInterpreterFrame *dying = frame;
             frame = tstate->current_frame = dying->previous;
-            _PyEvalFrameClearAndPop(tstate, dying);
+            _PyEval_FrameClearAndPop(tstate, dying);
             frame->prev_instr += frame->return_offset;
             _PyFrame_StackPush(frame, retval);
             goto resume_frame;
@@ -3545,7 +3554,8 @@ dummy_func(
                 goto error;
             }
 
-            func_obj->func_version = ((PyCodeObject *)codeobj)->co_version;
+            _PyFunction_SetVersion(
+                func_obj, ((PyCodeObject *)codeobj)->co_version);
             func = (PyObject *)func_obj;
         }
 
index 1e2262c1f18c3ed0cbf34f48f5a265d4955fe24b..329a1a17cf09d43ef43ef13ab3dd95569f866c29 100644 (file)
@@ -222,8 +222,6 @@ _PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func,
 static  _PyInterpreterFrame *
 _PyEvalFramePushAndInit_Ex(PyThreadState *tstate, PyFunctionObject *func,
     PyObject *locals, Py_ssize_t nargs, PyObject *callargs, PyObject *kwargs);
-static void
-_PyEvalFrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame);
 
 #ifdef HAVE_ERRNO_H
 #include <errno.h>
@@ -603,10 +601,6 @@ int _Py_CheckRecursiveCallPy(
 }
 
 
-static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate)  {
-    tstate->py_recursion_remaining++;
-}
-
 static const _Py_CODEUNIT _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS[] = {
     /* Put a NOP at the start, so that the IP points into
     * the code, rather than before it */
@@ -731,7 +725,7 @@ resume_frame:
                 // When tracing executed uops, also trace bytecode
                 char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
                 if (uop_debug != NULL && *uop_debug >= '0') {
-                    lltrace = (*uop_debug - '0') >= 4;  // TODO: Parse an int and all that
+                    lltrace = (*uop_debug - '0') >= 5;  // TODO: Parse an int and all that
                 }
             }
         }
@@ -918,7 +912,7 @@ exit_unwind:
     // GH-99729: We need to unlink the frame *before* clearing it:
     _PyInterpreterFrame *dying = frame;
     frame = tstate->current_frame = dying->previous;
-    _PyEvalFrameClearAndPop(tstate, dying);
+    _PyEval_FrameClearAndPop(tstate, dying);
     frame->return_offset = 0;
     if (frame == &entry_frame) {
         /* Restore previous frame and exit */
@@ -1487,8 +1481,8 @@ clear_gen_frame(PyThreadState *tstate, _PyInterpreterFrame * frame)
     frame->previous = NULL;
 }
 
-static void
-_PyEvalFrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame * frame)
+void
+_PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame * frame)
 {
     if (frame->owner == FRAME_OWNED_BY_THREAD) {
         clear_thread_frame(tstate, frame);
index 08f19cd9a397f11028b6ab8d3b0a2f1fa2539f9a..635b8e501e523e1802da880ca0f93126efd3f143 100644 (file)
@@ -369,3 +369,7 @@ static inline int _Py_EnterRecursivePy(PyThreadState *tstate) {
     return (tstate->py_recursion_remaining-- <= 0) &&
         _Py_CheckRecursiveCallPy(tstate);
 }
+
+static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate)  {
+    tstate->py_recursion_remaining++;
+}
index 9fbf026f164a60dcf46fcfff79b12e658ec85dca..89a5bbfecded0e5d688ae59de6737b73c4130b7f 100644 (file)
@@ -7,6 +7,23 @@
             break;
         }
 
+        case RESUME: {
+            #if TIER_ONE
+            assert(frame == tstate->current_frame);
+            /* Possibly combine this with eval breaker */
+            if (_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version) {
+                int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp);
+                if (err) goto error;
+                next_instr--;
+            }
+            else
+            #endif
+            if (oparg < 2) {
+                CHECK_EVAL_BREAKER();
+            }
+            break;
+        }
+
         case LOAD_FAST_CHECK: {
             PyObject *value;
             value = GETLOCAL(oparg);
             break;
         }
 
+        case _POP_FRAME: {
+            PyObject *retval;
+            retval = stack_pointer[-1];
+            STACK_SHRINK(1);
+            assert(EMPTY());
+            _PyFrame_SetStackPointer(frame, stack_pointer);
+            _Py_LeaveRecursiveCallPy(tstate);
+            // GH-99729: We need to unlink the frame *before* clearing it:
+            _PyInterpreterFrame *dying = frame;
+            #if TIER_ONE
+            assert(frame != &entry_frame);
+            #endif
+            frame = tstate->current_frame = dying->previous;
+            _PyEval_FrameClearAndPop(tstate, dying);
+            frame->prev_instr += frame->return_offset;
+            _PyFrame_StackPush(frame, retval);
+            #if TIER_ONE
+            goto resume_frame;
+            #endif
+            #if TIER_TWO
+            stack_pointer = _PyFrame_GetStackPointer(frame);
+            ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
+            #endif
+            break;
+        }
+
         case GET_AITER: {
             PyObject *obj;
             PyObject *iter;
                 goto error;
             }
 
-            func_obj->func_version = ((PyCodeObject *)codeobj)->co_version;
+            _PyFunction_SetVersion(
+                func_obj, ((PyCodeObject *)codeobj)->co_version);
             func = (PyObject *)func_obj;
             stack_pointer[-1] = func;
             break;
index 80af8a7bcd56dff01ea32453cc375dbb59fc0d87..f6322df566c6500d1cdd2aabcadfff59002aeb68 100644 (file)
@@ -8,6 +8,7 @@
         }
 
         TARGET(RESUME) {
+            #if TIER_ONE
             assert(frame == tstate->current_frame);
             /* Possibly combine this with eval breaker */
             if (_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version) {
@@ -15,7 +16,9 @@
                 if (err) goto error;
                 next_instr--;
             }
-            else if (oparg < 2) {
+            else
+            #endif
+            if (oparg < 2) {
                 CHECK_EVAL_BREAKER();
             }
             DISPATCH();
 
         TARGET(RETURN_VALUE) {
             PyObject *retval;
+            // SAVE_CURRENT_IP
+            {
+                #if TIER_ONE
+                frame->prev_instr = next_instr - 1;
+                #endif
+                #if TIER_TWO
+                // Relies on a preceding SAVE_IP
+                frame->prev_instr--;
+                #endif
+            }
+            // _POP_FRAME
             retval = stack_pointer[-1];
             STACK_SHRINK(1);
-            assert(EMPTY());
-            _PyFrame_SetStackPointer(frame, stack_pointer);
-            _Py_LeaveRecursiveCallPy(tstate);
-            assert(frame != &entry_frame);
-            // GH-99729: We need to unlink the frame *before* clearing it:
-            _PyInterpreterFrame *dying = frame;
-            frame = tstate->current_frame = dying->previous;
-            _PyEvalFrameClearAndPop(tstate, dying);
-            frame->prev_instr += frame->return_offset;
-            _PyFrame_StackPush(frame, retval);
-            goto resume_frame;
-            STACK_SHRINK(1);
+            {
+                assert(EMPTY());
+                _PyFrame_SetStackPointer(frame, stack_pointer);
+                _Py_LeaveRecursiveCallPy(tstate);
+                // GH-99729: We need to unlink the frame *before* clearing it:
+                _PyInterpreterFrame *dying = frame;
+                #if TIER_ONE
+                assert(frame != &entry_frame);
+                #endif
+                frame = tstate->current_frame = dying->previous;
+                _PyEval_FrameClearAndPop(tstate, dying);
+                frame->prev_instr += frame->return_offset;
+                _PyFrame_StackPush(frame, retval);
+                #if TIER_ONE
+                goto resume_frame;
+                #endif
+                #if TIER_TWO
+                stack_pointer = _PyFrame_GetStackPointer(frame);
+                ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
+                #endif
+            }
         }
 
         TARGET(INSTRUMENTED_RETURN_VALUE) {
             // GH-99729: We need to unlink the frame *before* clearing it:
             _PyInterpreterFrame *dying = frame;
             frame = tstate->current_frame = dying->previous;
-            _PyEvalFrameClearAndPop(tstate, dying);
+            _PyEval_FrameClearAndPop(tstate, dying);
             frame->prev_instr += frame->return_offset;
             _PyFrame_StackPush(frame, retval);
             goto resume_frame;
         }
 
         TARGET(RETURN_CONST) {
-            PyObject *retval = GETITEM(FRAME_CO_CONSTS, oparg);
-            Py_INCREF(retval);
-            assert(EMPTY());
-            _PyFrame_SetStackPointer(frame, stack_pointer);
-            _Py_LeaveRecursiveCallPy(tstate);
-            assert(frame != &entry_frame);
-            // GH-99729: We need to unlink the frame *before* clearing it:
-            _PyInterpreterFrame *dying = frame;
-            frame = tstate->current_frame = dying->previous;
-            _PyEvalFrameClearAndPop(tstate, dying);
-            frame->prev_instr += frame->return_offset;
-            _PyFrame_StackPush(frame, retval);
-            goto resume_frame;
+            PyObject *value;
+            PyObject *retval;
+            // LOAD_CONST
+            {
+                value = GETITEM(FRAME_CO_CONSTS, oparg);
+                Py_INCREF(value);
+            }
+            // SAVE_CURRENT_IP
+            {
+                #if TIER_ONE
+                frame->prev_instr = next_instr - 1;
+                #endif
+                #if TIER_TWO
+                // Relies on a preceding SAVE_IP
+                frame->prev_instr--;
+                #endif
+            }
+            // _POP_FRAME
+            retval = value;
+            {
+                assert(EMPTY());
+                _PyFrame_SetStackPointer(frame, stack_pointer);
+                _Py_LeaveRecursiveCallPy(tstate);
+                // GH-99729: We need to unlink the frame *before* clearing it:
+                _PyInterpreterFrame *dying = frame;
+                #if TIER_ONE
+                assert(frame != &entry_frame);
+                #endif
+                frame = tstate->current_frame = dying->previous;
+                _PyEval_FrameClearAndPop(tstate, dying);
+                frame->prev_instr += frame->return_offset;
+                _PyFrame_StackPush(frame, retval);
+                #if TIER_ONE
+                goto resume_frame;
+                #endif
+                #if TIER_TWO
+                stack_pointer = _PyFrame_GetStackPointer(frame);
+                ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
+                #endif
+            }
         }
 
         TARGET(INSTRUMENTED_RETURN_CONST) {
             // GH-99729: We need to unlink the frame *before* clearing it:
             _PyInterpreterFrame *dying = frame;
             frame = tstate->current_frame = dying->previous;
-            _PyEvalFrameClearAndPop(tstate, dying);
+            _PyEval_FrameClearAndPop(tstate, dying);
             frame->prev_instr += frame->return_offset;
             _PyFrame_StackPush(frame, retval);
             goto resume_frame;
                 goto error;
             }
 
-            func_obj->func_version = ((PyCodeObject *)codeobj)->co_version;
+            _PyFunction_SetVersion(
+                func_obj, ((PyCodeObject *)codeobj)->co_version);
             func = (PyObject *)func_obj;
             stack_pointer[-1] = func;
             DISPATCH();
index 559c4ae987263e678f7bb4bcc3aea86787ab90bf..57518404c3f19d7bbf28c37b4ce3f6ab3cdc034f 100644 (file)
@@ -373,6 +373,8 @@ static PyTypeObject UOpExecutor_Type = {
     .tp_as_sequence = &uop_as_sequence,
 };
 
+#define TRACE_STACK_SIZE 5
+
 static int
 translate_bytecode_to_trace(
     PyCodeObject *code,
@@ -380,10 +382,16 @@ translate_bytecode_to_trace(
     _PyUOpInstruction *trace,
     int buffer_size)
 {
+    PyCodeObject *initial_code = code;
     _Py_CODEUNIT *initial_instr = instr;
     int trace_length = 0;
     int max_length = buffer_size;
     int reserved = 0;
+    struct {
+        PyCodeObject *code;
+        _Py_CODEUNIT *instr;
+    } trace_stack[TRACE_STACK_SIZE];
+    int trace_stack_depth = 0;
 
 #ifdef Py_DEBUG
     char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
@@ -441,6 +449,24 @@ translate_bytecode_to_trace(
 // Reserve space for main+stub uops, plus 2 for SAVE_IP and EXIT_TRACE
 #define RESERVE(main, stub) RESERVE_RAW((main) + (stub) + 2, uop_name(opcode))
 
+// Trace stack operations (used by _PUSH_FRAME, _POP_FRAME)
+#define TRACE_STACK_PUSH() \
+    if (trace_stack_depth >= TRACE_STACK_SIZE) { \
+        DPRINTF(2, "Trace stack overflow\n"); \
+        ADD_TO_TRACE(SAVE_IP, 0, 0); \
+        goto done; \
+    } \
+    trace_stack[trace_stack_depth].code = code; \
+    trace_stack[trace_stack_depth].instr = instr; \
+    trace_stack_depth++;
+#define TRACE_STACK_POP() \
+    if (trace_stack_depth <= 0) { \
+        Py_FatalError("Trace stack underflow\n"); \
+    } \
+    trace_stack_depth--; \
+    code = trace_stack[trace_stack_depth].code; \
+    instr = trace_stack[trace_stack_depth].instr;
+
     DPRINTF(4,
             "Optimizing %s (%s:%d) at byte offset %d\n",
             PyUnicode_AsUTF8(code->co_qualname),
@@ -448,6 +474,7 @@ translate_bytecode_to_trace(
             code->co_firstlineno,
             2 * INSTR_IP(initial_instr, code));
 
+top:  // Jump here after _PUSH_FRAME
     for (;;) {
         RESERVE_RAW(2, "epilogue");  // Always need space for SAVE_IP and EXIT_TRACE
         ADD_TO_TRACE(SAVE_IP, INSTR_IP(instr, code), 0);
@@ -508,7 +535,7 @@ pop_jump_if_bool:
 
             case JUMP_BACKWARD:
             {
-                if (instr + 2 - oparg == initial_instr) {
+                if (instr + 2 - oparg == initial_instr && code == initial_code) {
                     RESERVE(1, 0);
                     ADD_TO_TRACE(JUMP_TO_TOP, 0, 0);
                 }
@@ -573,6 +600,14 @@ pop_jump_if_bool:
                     // Reserve space for nuops (+ SAVE_IP + EXIT_TRACE)
                     int nuops = expansion->nuops;
                     RESERVE(nuops, 0);
+                    if (expansion->uops[nuops-1].uop == _POP_FRAME) {
+                        // Check for trace stack underflow now:
+                        // We can't bail e.g. in the middle of
+                        // LOAD_CONST + _POP_FRAME.
+                        if (trace_stack_depth == 0) {
+                            DPRINTF(2, "Trace stack underflow\n");
+                            goto done;}
+                    }
                     uint32_t orig_oparg = oparg;  // For OPARG_TOP/BOTTOM
                     for (int i = 0; i < nuops; i++) {
                         oparg = orig_oparg;
@@ -619,8 +654,57 @@ pop_jump_if_bool:
                                 Py_FatalError("garbled expansion");
                         }
                         ADD_TO_TRACE(expansion->uops[i].uop, oparg, operand);
+                        if (expansion->uops[i].uop == _POP_FRAME) {
+                            TRACE_STACK_POP();
+                            DPRINTF(2,
+                                "Returning to %s (%s:%d) at byte offset %d\n",
+                                PyUnicode_AsUTF8(code->co_qualname),
+                                PyUnicode_AsUTF8(code->co_filename),
+                                code->co_firstlineno,
+                                2 * INSTR_IP(instr, code));
+                            goto top;
+                        }
                         if (expansion->uops[i].uop == _PUSH_FRAME) {
                             assert(i + 1 == nuops);
+                            int func_version_offset =
+                                offsetof(_PyCallCache, func_version)/sizeof(_Py_CODEUNIT)
+                                // Add one to account for the actual opcode/oparg pair:
+                                + 1;
+                            uint32_t func_version = read_u32(&instr[func_version_offset].cache);
+                            PyFunctionObject *func = _PyFunction_LookupByVersion(func_version);
+                            DPRINTF(3, "Function object: %p\n", func);
+                            if (func != NULL) {
+                                PyCodeObject *new_code = (PyCodeObject *)PyFunction_GET_CODE(func);
+                                if (new_code == code) {
+                                    // Recursive call, bail (we could be here forever).
+                                    DPRINTF(2, "Bailing on recursive call to %s (%s:%d)\n",
+                                            PyUnicode_AsUTF8(new_code->co_qualname),
+                                            PyUnicode_AsUTF8(new_code->co_filename),
+                                            new_code->co_firstlineno);
+                                    ADD_TO_TRACE(SAVE_IP, 0, 0);
+                                    goto done;
+                                }
+                                if (new_code->co_version != func_version) {
+                                    // func.__code__ was updated.
+                                    // Perhaps it may happen again, so don't bother tracing.
+                                    // TODO: Reason about this -- is it better to bail or not?
+                                    DPRINTF(2, "Bailing because co_version != func_version\n");
+                                    ADD_TO_TRACE(SAVE_IP, 0, 0);
+                                    goto done;
+                                }
+                                // Increment IP to the return address
+                                instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1;
+                                TRACE_STACK_PUSH();
+                                code = new_code;
+                                instr = _PyCode_CODE(code);
+                                DPRINTF(2,
+                                    "Continuing in %s (%s:%d) at byte offset %d\n",
+                                    PyUnicode_AsUTF8(code->co_qualname),
+                                    PyUnicode_AsUTF8(code->co_filename),
+                                    code->co_firstlineno,
+                                    2 * INSTR_IP(instr, code));
+                                goto top;
+                            }
                             ADD_TO_TRACE(SAVE_IP, 0, 0);
                             goto done;
                         }
@@ -639,6 +723,10 @@ pop_jump_if_bool:
     }  // End for (;;)
 
 done:
+    while (trace_stack_depth > 0) {
+        TRACE_STACK_POP();
+    }
+    assert(code == initial_code);
     // Skip short traces like SAVE_IP, LOAD_FAST, SAVE_IP, EXIT_TRACE
     if (trace_length > 3) {
         ADD_TO_TRACE(EXIT_TRACE, 0, 0);
index 2db1cd01c19ae5df233899c67a1563f040ef01d1..48f2db981c95b6e75e3565b778d10d3d143c65f6 100644 (file)
@@ -364,10 +364,12 @@ class Analyzer:
                 case Instruction() as instr:
                     part, offset = self.analyze_instruction(instr, offset)
                     parts.append(part)
-                    flags.add(instr.instr_flags)
+                    if instr.name != "SAVE_IP":
+                        # SAVE_IP in a macro is a no-op in Tier 1
+                        flags.add(instr.instr_flags)
                 case _:
                     typing.assert_never(component)
-        format = "IB"
+        format = "IB" if flags.HAS_ARG_FLAG else "IX"
         if offset:
             format += "C" + "0" * (offset - 1)
         return MacroInstruction(macro.name, format, flags, macro, parts, offset)
index 8361eb99f88a7cc1bcb050b25f71379968daa61c..632298a567dd400eba09d5e25303577ba69bb8b6 100644 (file)
@@ -380,7 +380,7 @@ def write_components(
                     poke.as_stack_effect(lax=True),
                 )
 
-        if mgr.instr.name == "_PUSH_FRAME":
+        if mgr.instr.name in ("_PUSH_FRAME", "_POP_FRAME"):
             # Adjust stack to min_offset (input effects materialized)
             out.stack_adjust(mgr.min_offset.deep, mgr.min_offset.high)
             # Use clone() since adjust_inverse() mutates final_offset