]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-148378: Allow multiple consecutive recording ops per macro op (GH-148496)
authorHai Zhu <haiizhu@outlook.com>
Tue, 14 Apr 2026 11:26:53 +0000 (19:26 +0800)
committerGitHub <noreply@github.com>
Tue, 14 Apr 2026 11:26:53 +0000 (19:26 +0800)
Include/internal/pycore_optimizer.h
Lib/test/test_generated_cases.py
Modules/_testinternalcapi/test_cases.c.h
Python/bytecodes.c
Python/generated_cases.c.h
Python/optimizer.c
Python/record_functions.c.h
Tools/cases_generator/analyzer.py
Tools/cases_generator/record_function_generator.py

index e7b688333d9ced13644e87107aafdc7760b49a9c..78bbdc2026e9d2d71293ce282672683b17bc1136 100644 (file)
@@ -91,13 +91,15 @@ typedef struct _PyJitTracerInitialState {
     _Py_CODEUNIT *jump_backward_instr;
 } _PyJitTracerInitialState;
 
+#define MAX_RECORDED_VALUES 3
 typedef struct _PyJitTracerPreviousState {
     int instr_oparg;
     int instr_stacklevel;
     _Py_CODEUNIT *instr;
     PyCodeObject *instr_code; // Strong
     struct _PyInterpreterFrame *instr_frame;
-    PyObject *recorded_value; // Strong, may be NULL
+    PyObject *recorded_values[MAX_RECORDED_VALUES]; // Strong, may be NULL
+    int recorded_count;
 } _PyJitTracerPreviousState;
 
 typedef struct _PyJitTracerTranslatorState {
@@ -481,7 +483,12 @@ void _PyJit_TracerFree(_PyThreadStateImpl *_tstate);
 #ifdef _Py_TIER2
 typedef void (*_Py_RecordFuncPtr)(_PyInterpreterFrame *frame, _PyStackRef *stackpointer, int oparg, PyObject **recorded_value);
 PyAPI_DATA(const _Py_RecordFuncPtr) _PyOpcode_RecordFunctions[];
-PyAPI_DATA(const uint8_t) _PyOpcode_RecordFunctionIndices[256];
+
+typedef struct {
+    uint8_t count;
+    uint8_t indices[MAX_RECORDED_VALUES];
+} _PyOpcodeRecordEntry;
+PyAPI_DATA(const _PyOpcodeRecordEntry) _PyOpcode_RecordEntries[256];
 #endif
 
 #ifdef __cplusplus
index 33fae682a3ceec7eb82b57da80931d8e1717856a..62cf0c0c6af0b2244bbc6083beecf23ea764fb7e 100644 (file)
@@ -29,12 +29,13 @@ skip_if_different_mount_drives()
 
 test_tools.skip_if_missing("cases_generator")
 with test_tools.imports_under_tool("cases_generator"):
-    from analyzer import StackItem
+    from analyzer import StackItem, analyze_files
     from cwriter import CWriter
     import parser
     from stack import Local, Stack
     import tier1_generator
     import optimizer_generator
+    import record_function_generator
 
 
 def handle_stderr():
@@ -1948,6 +1949,202 @@ class TestGeneratedCases(unittest.TestCase):
         with self.assertRaisesRegex(SyntaxError, "Recording uop"):
             self.run_cases_test(input, "")
 
+    def test_multiple_consecutive_recording_uops(self):
+        """Multiple consecutive recording uops at the start of a macro are legal."""
+        input = """
+        tier2 op(_RECORD_A, (a, b -- a, b)) {
+            RECORD_VALUE(a);
+        }
+        tier2 op(_RECORD_B, (a, b -- a, b)) {
+            RECORD_VALUE(b);
+        }
+        op(_DO_STUFF, (a, b -- res)) {
+            res = a;
+            INPUTS_DEAD();
+        }
+        macro(OP) = _RECORD_A + _RECORD_B + _DO_STUFF;
+        """
+        output = """
+        TARGET(OP) {
+            #if _Py_TAIL_CALL_INTERP
+            int opcode = OP;
+            (void)(opcode);
+            #endif
+            frame->instr_ptr = next_instr;
+            next_instr += 1;
+            INSTRUCTION_STATS(OP);
+            _PyStackRef a;
+            _PyStackRef res;
+            // _DO_STUFF
+            {
+                a = stack_pointer[-2];
+                res = a;
+            }
+            stack_pointer[-2] = res;
+            stack_pointer += -1;
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
+            DISPATCH();
+        }
+        """
+        self.run_cases_test(input, output)
+
+    def test_multiple_recording_uops_after_specializing(self):
+        """Multiple recording uops after a specializing uop are legal."""
+        input = """
+        specializing op(_SPECIALIZE_OP, (counter/1, a, b -- a, b)) {
+            SPAM();
+        }
+        tier2 op(_RECORD_A, (a, b -- a, b)) {
+            RECORD_VALUE(a);
+        }
+        tier2 op(_RECORD_B, (a, b -- a, b)) {
+            RECORD_VALUE(b);
+        }
+        op(_DO_STUFF, (a, b -- res)) {
+            res = a;
+            INPUTS_DEAD();
+        }
+        macro(OP) = _SPECIALIZE_OP + _RECORD_A + _RECORD_B + unused/2 + _DO_STUFF;
+        """
+        output = """
+        TARGET(OP) {
+            #if _Py_TAIL_CALL_INTERP
+            int opcode = OP;
+            (void)(opcode);
+            #endif
+            _Py_CODEUNIT* const this_instr = next_instr;
+            (void)this_instr;
+            frame->instr_ptr = next_instr;
+            next_instr += 4;
+            INSTRUCTION_STATS(OP);
+            _PyStackRef a;
+            _PyStackRef res;
+            // _SPECIALIZE_OP
+            {
+                uint16_t counter = read_u16(&this_instr[1].cache);
+                (void)counter;
+                SPAM();
+            }
+            /* Skip 2 cache entries */
+            // _DO_STUFF
+            {
+                a = stack_pointer[-2];
+                res = a;
+            }
+            stack_pointer[-2] = res;
+            stack_pointer += -1;
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
+            DISPATCH();
+        }
+        """
+        self.run_cases_test(input, output)
+
+    def test_recording_uop_between_real_uops_rejected(self):
+        """A recording uop sandwiched between real uops is rejected."""
+        input = """
+        tier2 op(_RECORD_A, (a, b -- a, b)) {
+            RECORD_VALUE(a);
+        }
+        op(_FIRST, (a, b -- a, b)) {
+            first(a);
+        }
+        tier2 op(_RECORD_B, (a, b -- a, b)) {
+            RECORD_VALUE(b);
+        }
+        macro(OP) = _RECORD_A + _FIRST + _RECORD_B;
+        """
+        with self.assertRaisesRegex(SyntaxError,
+                                    "must precede all "
+                                    "non-recording, non-specializing uops"):
+            self.run_cases_test(input, "")
+
+
+class TestRecorderTableGeneration(unittest.TestCase):
+
+    def setUp(self) -> None:
+        super().setUp()
+        self.maxDiff = None
+        self.temp_dir = tempfile.gettempdir()
+        self.temp_input_filename = os.path.join(self.temp_dir, "input.txt")
+
+    def tearDown(self) -> None:
+        try:
+            os.remove(self.temp_input_filename)
+        except FileNotFoundError:
+            pass
+        super().tearDown()
+
+    def generate_tables(self, input: str) -> str:
+        import io
+        with open(self.temp_input_filename, "w+") as f:
+            f.write(parser.BEGIN_MARKER)
+            f.write(input)
+            f.write(parser.END_MARKER)
+        with handle_stderr():
+            analysis = analyze_files([self.temp_input_filename])
+        buf = io.StringIO()
+        out = CWriter(buf, 0, False)
+        record_function_generator.generate_recorder_tables(analysis, out)
+        return buf.getvalue()
+
+    def test_single_recording_uop_generates_count(self):
+        input = """
+        tier2 op(_RECORD_TOS, (value -- value)) {
+            RECORD_VALUE(value);
+        }
+        op(_DO_STUFF, (value -- res)) {
+            res = value;
+        }
+        macro(OP) = _RECORD_TOS + _DO_STUFF;
+        """
+        output = self.generate_tables(input)
+        self.assertIn("_RECORD_TOS_INDEX", output)
+        self.assertIn("[OP] = {1, {_RECORD_TOS_INDEX}}", output)
+
+    def test_three_recording_uops_generate_count_3_in_order(self):
+        input = """
+        tier2 op(_RECORD_X, (a, b, c -- a, b, c)) {
+            RECORD_VALUE(a);
+        }
+        tier2 op(_RECORD_Y, (a, b, c -- a, b, c)) {
+            RECORD_VALUE(b);
+        }
+        tier2 op(_RECORD_Z, (a, b, c -- a, b, c)) {
+            RECORD_VALUE(c);
+        }
+        op(_DO_STUFF, (a, b, c -- res)) {
+            res = a;
+        }
+        macro(OP) = _RECORD_X + _RECORD_Y + _RECORD_Z + _DO_STUFF;
+        """
+        output = self.generate_tables(input)
+        self.assertIn(
+            "[OP] = {3, {_RECORD_X_INDEX, _RECORD_Y_INDEX, _RECORD_Z_INDEX}}",
+            output,
+        )
+
+    def test_four_recording_uops_rejected(self):
+        input = """
+        tier2 op(_RECORD_A, (a, b, c, d -- a, b, c, d)) {
+            RECORD_VALUE(a);
+        }
+        tier2 op(_RECORD_B, (a, b, c, d -- a, b, c, d)) {
+            RECORD_VALUE(b);
+        }
+        tier2 op(_RECORD_C, (a, b, c, d -- a, b, c, d)) {
+            RECORD_VALUE(c);
+        }
+        tier2 op(_RECORD_D, (a, b, c, d -- a, b, c, d)) {
+            RECORD_VALUE(d);
+        }
+        op(_DO_STUFF, (a, b, c, d -- res)) {
+            res = a;
+        }
+        macro(OP) = _RECORD_A + _RECORD_B + _RECORD_C + _RECORD_D + _DO_STUFF;
+        """
+        with self.assertRaisesRegex(ValueError, "exceeds MAX_RECORDED_VALUES"):
+            self.generate_tables(input)
+
 
 class TestGeneratedAbstractCases(unittest.TestCase):
     def setUp(self) -> None:
index 041adcff388a41ed8a69a7b7e8f0ea04a30edf85..8bf751026bd35c7d6d98ce1b2ba9be7d207cc965 100644 (file)
                 }
                 DISPATCH();
             }
-            _PyFrame_SetStackPointer(frame, stack_pointer);
-            Py_CLEAR(tracer->prev_state.recorded_value);
-            stack_pointer = _PyFrame_GetStackPointer(frame);
+            for (int i = 0; i < tracer->prev_state.recorded_count; i++) {
+                _PyFrame_SetStackPointer(frame, stack_pointer);
+                Py_CLEAR(tracer->prev_state.recorded_values[i]);
+                stack_pointer = _PyFrame_GetStackPointer(frame);
+            }
+            tracer->prev_state.recorded_count = 0;
             tracer->prev_state.instr = next_instr;
             PyObject *prev_code = PyStackRef_AsPyObjectBorrow(frame->f_executable);
             if (tracer->prev_state.instr_code != (PyCodeObject *)prev_code) {
             if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]) {
                 (&next_instr[1])->counter = trigger_backoff_counter();
             }
-            uint8_t record_func_index = _PyOpcode_RecordFunctionIndices[opcode];
-            if (record_func_index) {
-                _Py_RecordFuncPtr doesnt_escape = _PyOpcode_RecordFunctions[record_func_index];
-                doesnt_escape(frame, stack_pointer, oparg, &tracer->prev_state.recorded_value);
+            const _PyOpcodeRecordEntry *record_entry = &_PyOpcode_RecordEntries[opcode];
+            for (int i = 0; i < record_entry->count; i++) {
+                _Py_RecordFuncPtr doesnt_escape = _PyOpcode_RecordFunctions[record_entry->indices[i]];
+                doesnt_escape(frame, stack_pointer, oparg, &tracer->prev_state.recorded_values[i]);
             }
+            tracer->prev_state.recorded_count = record_entry->count;
             DISPATCH_GOTO_NON_TRACING();
             #else
             (void)prev_instr;
index b6526b08c0a816f6317e081deb2796481540407a..57731fc65ed35c3bcd4eb37b0b185f4d73b5116d 100644 (file)
@@ -6349,7 +6349,10 @@ dummy_func(
                 ERROR_IF(err < 0);
                 DISPATCH();
             }
-            Py_CLEAR(tracer->prev_state.recorded_value);
+            for (int i = 0; i < tracer->prev_state.recorded_count; i++) {
+                Py_CLEAR(tracer->prev_state.recorded_values[i]);
+            }
+            tracer->prev_state.recorded_count = 0;
             tracer->prev_state.instr = next_instr;
             PyObject *prev_code = PyStackRef_AsPyObjectBorrow(frame->f_executable);
             if (tracer->prev_state.instr_code != (PyCodeObject *)prev_code) {
@@ -6363,11 +6366,12 @@ dummy_func(
                 (&next_instr[1])->counter = trigger_backoff_counter();
             }
 
-            uint8_t record_func_index = _PyOpcode_RecordFunctionIndices[opcode];
-            if (record_func_index) {
-                _Py_RecordFuncPtr doesnt_escape = _PyOpcode_RecordFunctions[record_func_index];
-                doesnt_escape(frame, stack_pointer, oparg, &tracer->prev_state.recorded_value);
+            const _PyOpcodeRecordEntry *record_entry = &_PyOpcode_RecordEntries[opcode];
+            for (int i = 0; i < record_entry->count; i++) {
+                _Py_RecordFuncPtr doesnt_escape = _PyOpcode_RecordFunctions[record_entry->indices[i]];
+                doesnt_escape(frame, stack_pointer, oparg, &tracer->prev_state.recorded_values[i]);
             }
+            tracer->prev_state.recorded_count = record_entry->count;
             DISPATCH_GOTO_NON_TRACING();
 #else
             (void)prev_instr;
index ae01925077c29eeef484a5cf5ff1f2acb23c1bc2..d710e82a306ab440902ec196e1034121fbe77ad5 100644 (file)
                 }
                 DISPATCH();
             }
-            _PyFrame_SetStackPointer(frame, stack_pointer);
-            Py_CLEAR(tracer->prev_state.recorded_value);
-            stack_pointer = _PyFrame_GetStackPointer(frame);
+            for (int i = 0; i < tracer->prev_state.recorded_count; i++) {
+                _PyFrame_SetStackPointer(frame, stack_pointer);
+                Py_CLEAR(tracer->prev_state.recorded_values[i]);
+                stack_pointer = _PyFrame_GetStackPointer(frame);
+            }
+            tracer->prev_state.recorded_count = 0;
             tracer->prev_state.instr = next_instr;
             PyObject *prev_code = PyStackRef_AsPyObjectBorrow(frame->f_executable);
             if (tracer->prev_state.instr_code != (PyCodeObject *)prev_code) {
             if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]) {
                 (&next_instr[1])->counter = trigger_backoff_counter();
             }
-            uint8_t record_func_index = _PyOpcode_RecordFunctionIndices[opcode];
-            if (record_func_index) {
-                _Py_RecordFuncPtr doesnt_escape = _PyOpcode_RecordFunctions[record_func_index];
-                doesnt_escape(frame, stack_pointer, oparg, &tracer->prev_state.recorded_value);
+            const _PyOpcodeRecordEntry *record_entry = &_PyOpcode_RecordEntries[opcode];
+            for (int i = 0; i < record_entry->count; i++) {
+                _Py_RecordFuncPtr doesnt_escape = _PyOpcode_RecordFunctions[record_entry->indices[i]];
+                doesnt_escape(frame, stack_pointer, oparg, &tracer->prev_state.recorded_values[i]);
             }
+            tracer->prev_state.recorded_count = record_entry->count;
             DISPATCH_GOTO_NON_TRACING();
             #else
             (void)prev_instr;
index f09bf778587b123927f240f7b910dfe6fd7730ef..5d5aecda4e45e382929d549fb411abe73bcb0d90 100644 (file)
@@ -866,6 +866,7 @@ _PyJit_translate_single_bytecode_to_trace(
             assert(nuops > 0);
             uint32_t orig_oparg = oparg;  // For OPARG_TOP/BOTTOM
             uint32_t orig_target = target;
+            int record_idx = 0;
             for (int i = 0; i < nuops; i++) {
                 oparg = orig_oparg;
                 target = orig_target;
@@ -946,8 +947,9 @@ _PyJit_translate_single_bytecode_to_trace(
                     operand = next->op.arg;
                 }
                 else if (_PyUop_Flags[uop] & HAS_RECORDS_VALUE_FLAG) {
-                    PyObject *recorded_value = tracer->prev_state.recorded_value;
-                    tracer->prev_state.recorded_value = NULL;
+                    PyObject *recorded_value = tracer->prev_state.recorded_values[record_idx];
+                    tracer->prev_state.recorded_values[record_idx] = NULL;
+                    record_idx++;
                     operand = (uintptr_t)recorded_value;
                 }
                 // All other instructions
@@ -1060,12 +1062,16 @@ _PyJit_TryInitializeTracing(
     tracer->prev_state.instr_frame = frame;
     tracer->prev_state.instr_oparg = oparg;
     tracer->prev_state.instr_stacklevel = tracer->initial_state.stack_depth;
-    tracer->prev_state.recorded_value = NULL;
-    uint8_t record_func_index = _PyOpcode_RecordFunctionIndices[curr_instr->op.code];
-    if (record_func_index) {
-        _Py_RecordFuncPtr record_func = _PyOpcode_RecordFunctions[record_func_index];
-        record_func(frame, stack_pointer, oparg, &tracer->prev_state.recorded_value);
+    tracer->prev_state.recorded_count = 0;
+    for (int i = 0; i < MAX_RECORDED_VALUES; i++) {
+        tracer->prev_state.recorded_values[i] = NULL;
     }
+    const _PyOpcodeRecordEntry *record_entry = &_PyOpcode_RecordEntries[curr_instr->op.code];
+    for (int i = 0; i < record_entry->count; i++) {
+        _Py_RecordFuncPtr record_func = _PyOpcode_RecordFunctions[record_entry->indices[i]];
+        record_func(frame, stack_pointer, oparg, &tracer->prev_state.recorded_values[i]);
+    }
+    tracer->prev_state.recorded_count = record_entry->count;
     assert(curr_instr->op.code == JUMP_BACKWARD_JIT || curr_instr->op.code == RESUME_CHECK_JIT || (exit != NULL));
     tracer->initial_state.jump_backward_instr = curr_instr;
 
@@ -1117,7 +1123,10 @@ _PyJit_FinalizeTracing(PyThreadState *tstate, int err)
     Py_CLEAR(tracer->initial_state.func);
     Py_CLEAR(tracer->initial_state.executor);
     Py_CLEAR(tracer->prev_state.instr_code);
-    Py_CLEAR(tracer->prev_state.recorded_value);
+    for (int i = 0; i < MAX_RECORDED_VALUES; i++) {
+        Py_CLEAR(tracer->prev_state.recorded_values[i]);
+    }
+    tracer->prev_state.recorded_count = 0;
     uop_buffer_init(buffer, &tracer->uop_array[0], UOP_MAX_TRACE_LENGTH);
     tracer->is_tracing = false;
 }
index db03374b62f7badef682ad4d2faed8e9170c45a0..25bca4735726fd15a4436083981c55629779f74f 100644 (file)
@@ -99,41 +99,42 @@ void _PyOpcode_RecordFunction_CODE(_PyInterpreterFrame *frame, _PyStackRef *stac
 #define _RECORD_BOUND_METHOD_INDEX 6
 #define _RECORD_CALLABLE_KW_INDEX 7
 #define _RECORD_4OS_INDEX 8
-const uint8_t _PyOpcode_RecordFunctionIndices[256] = {
-        [TO_BOOL_ALWAYS_TRUE] = _RECORD_TOS_TYPE_INDEX,
-        [BINARY_OP_SUBSCR_GETITEM] = _RECORD_NOS_INDEX,
-        [SEND_GEN] = _RECORD_3OS_GEN_FUNC_INDEX,
-        [LOAD_SUPER_ATTR_METHOD] = _RECORD_NOS_INDEX,
-        [LOAD_ATTR_INSTANCE_VALUE] = _RECORD_TOS_TYPE_INDEX,
-        [LOAD_ATTR_WITH_HINT] = _RECORD_TOS_TYPE_INDEX,
-        [LOAD_ATTR_SLOT] = _RECORD_TOS_TYPE_INDEX,
-        [LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = _RECORD_TOS_TYPE_INDEX,
-        [LOAD_ATTR_PROPERTY] = _RECORD_TOS_TYPE_INDEX,
-        [STORE_ATTR_INSTANCE_VALUE] = _RECORD_TOS_TYPE_INDEX,
-        [STORE_ATTR_WITH_HINT] = _RECORD_TOS_TYPE_INDEX,
-        [STORE_ATTR_SLOT] = _RECORD_TOS_TYPE_INDEX,
-        [FOR_ITER_GEN] = _RECORD_NOS_GEN_FUNC_INDEX,
-        [LOAD_ATTR_METHOD_WITH_VALUES] = _RECORD_TOS_TYPE_INDEX,
-        [LOAD_ATTR_METHOD_NO_DICT] = _RECORD_TOS_TYPE_INDEX,
-        [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = _RECORD_TOS_TYPE_INDEX,
-        [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = _RECORD_TOS_TYPE_INDEX,
-        [LOAD_ATTR_METHOD_LAZY_DICT] = _RECORD_TOS_TYPE_INDEX,
-        [CALL_PY_GENERAL] = _RECORD_CALLABLE_INDEX,
-        [CALL_BOUND_METHOD_GENERAL] = _RECORD_BOUND_METHOD_INDEX,
-        [CALL_NON_PY_GENERAL] = _RECORD_CALLABLE_INDEX,
-        [CALL_BOUND_METHOD_EXACT_ARGS] = _RECORD_BOUND_METHOD_INDEX,
-        [CALL_PY_EXACT_ARGS] = _RECORD_CALLABLE_INDEX,
-        [CALL_ALLOC_AND_ENTER_INIT] = _RECORD_CALLABLE_INDEX,
-        [CALL_BUILTIN_CLASS] = _RECORD_CALLABLE_INDEX,
-        [CALL_BUILTIN_O] = _RECORD_CALLABLE_INDEX,
-        [CALL_BUILTIN_FAST] = _RECORD_CALLABLE_INDEX,
-        [CALL_BUILTIN_FAST_WITH_KEYWORDS] = _RECORD_CALLABLE_INDEX,
-        [CALL_METHOD_DESCRIPTOR_O] = _RECORD_CALLABLE_INDEX,
-        [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = _RECORD_CALLABLE_INDEX,
-        [CALL_METHOD_DESCRIPTOR_NOARGS] = _RECORD_CALLABLE_INDEX,
-        [CALL_KW_PY] = _RECORD_CALLABLE_KW_INDEX,
-        [CALL_KW_BOUND_METHOD] = _RECORD_CALLABLE_KW_INDEX,
-        [CALL_EX_PY] = _RECORD_4OS_INDEX,
+
+const _PyOpcodeRecordEntry _PyOpcode_RecordEntries[256] = {
+        [TO_BOOL_ALWAYS_TRUE] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [BINARY_OP_SUBSCR_GETITEM] = {1, {_RECORD_NOS_INDEX}},
+        [SEND_GEN] = {1, {_RECORD_3OS_GEN_FUNC_INDEX}},
+        [LOAD_SUPER_ATTR_METHOD] = {1, {_RECORD_NOS_INDEX}},
+        [LOAD_ATTR_INSTANCE_VALUE] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [LOAD_ATTR_WITH_HINT] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [LOAD_ATTR_SLOT] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [LOAD_ATTR_PROPERTY] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [STORE_ATTR_INSTANCE_VALUE] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [STORE_ATTR_WITH_HINT] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [STORE_ATTR_SLOT] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [FOR_ITER_GEN] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}},
+        [LOAD_ATTR_METHOD_WITH_VALUES] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [LOAD_ATTR_METHOD_NO_DICT] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [LOAD_ATTR_METHOD_LAZY_DICT] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [CALL_PY_GENERAL] = {1, {_RECORD_CALLABLE_INDEX}},
+        [CALL_BOUND_METHOD_GENERAL] = {1, {_RECORD_BOUND_METHOD_INDEX}},
+        [CALL_NON_PY_GENERAL] = {1, {_RECORD_CALLABLE_INDEX}},
+        [CALL_BOUND_METHOD_EXACT_ARGS] = {1, {_RECORD_BOUND_METHOD_INDEX}},
+        [CALL_PY_EXACT_ARGS] = {1, {_RECORD_CALLABLE_INDEX}},
+        [CALL_ALLOC_AND_ENTER_INIT] = {1, {_RECORD_CALLABLE_INDEX}},
+        [CALL_BUILTIN_CLASS] = {1, {_RECORD_CALLABLE_INDEX}},
+        [CALL_BUILTIN_O] = {1, {_RECORD_CALLABLE_INDEX}},
+        [CALL_BUILTIN_FAST] = {1, {_RECORD_CALLABLE_INDEX}},
+        [CALL_BUILTIN_FAST_WITH_KEYWORDS] = {1, {_RECORD_CALLABLE_INDEX}},
+        [CALL_METHOD_DESCRIPTOR_O] = {1, {_RECORD_CALLABLE_INDEX}},
+        [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = {1, {_RECORD_CALLABLE_INDEX}},
+        [CALL_METHOD_DESCRIPTOR_NOARGS] = {1, {_RECORD_CALLABLE_INDEX}},
+        [CALL_KW_PY] = {1, {_RECORD_CALLABLE_KW_INDEX}},
+        [CALL_KW_BOUND_METHOD] = {1, {_RECORD_CALLABLE_KW_INDEX}},
+        [CALL_EX_PY] = {1, {_RECORD_4OS_INDEX}},
 };
 
 const _Py_RecordFuncPtr _PyOpcode_RecordFunctions[9] = {
index 100de4c7250907c566b1d2a90f91c64872c62c0a..414ca18be4654cf17bd5eba8a590367af88d0433 100644 (file)
@@ -1132,9 +1132,7 @@ def add_macro(
     macro: parser.Macro, instructions: dict[str, Instruction], uops: dict[str, Uop]
 ) -> None:
     parts: list[Part] = []
-    # Track the last non-specializing uop seen, so that recording uops
-    # can follow specializing ones without triggering the position check.
-    prev_uop: Uop | None = None
+    seen_real_uop = False
     for part in macro.uops:
         match part:
             case parser.OpName():
@@ -1146,14 +1144,15 @@ def add_macro(
                             f"No Uop named {part.name}", macro.tokens[0]
                         )
                     uop = uops[part.name]
-                    if uop.properties.records_value and prev_uop is not None:
-                        raise analysis_error(
-                            f"Recording uop {part.name} is not allowed "
-                            f"after non-specializing uops in macro",
-                            macro.tokens[0])
+                    if uop.properties.records_value:
+                        if seen_real_uop:
+                            raise analysis_error(
+                                f"Recording uop {part.name} must precede all "
+                                f"non-recording, non-specializing uops in macro",
+                                macro.tokens[0])
+                    elif "specializing" not in uop.annotations:
+                        seen_real_uop = True
                     parts.append(uop)
-                    if "specializing" not in uop.annotations:
-                        prev_uop = uop
             case parser.CacheEffect():
                 parts.append(Skip(part.size))
             case _:
index 58d948f198c4dc8c7450b9f0c243c779231b6230..d7ae0ebf79fe6257d269734ba0ebb56258b0694c 100644 (file)
@@ -25,6 +25,9 @@ from stack import Stack, Storage
 
 DEFAULT_OUTPUT = ROOT / "Python/recorder_functions.c.h"
 
+# Must match MAX_RECORDED_VALUES in Include/internal/pycore_optimizer.h.
+MAX_RECORDED_VALUES = 3
+
 
 class RecorderEmitter(Emitter):
     def __init__(self, out: CWriter):
@@ -81,27 +84,35 @@ def generate_recorder_functions(filenames: list[str], analysis: Analysis, out: C
 
 def generate_recorder_tables(analysis: Analysis, out: CWriter) -> None:
     record_function_indexes: dict[str, int] = dict()
-    record_table: dict[str, str] = {}
+    record_table: dict[str, list[str]] = {}
     index = 1
     for inst in analysis.instructions.values():
         if not inst.properties.records_value:
             continue
+        records: list[str] = []
         for part in inst.parts:
             if not part.properties.records_value:
                 continue
             if part.name not in record_function_indexes:
                 record_function_indexes[part.name] = index
                 index += 1
-            record_table[inst.name] = part.name
-            break
+            records.append(part.name)
+        if records:
+            if len(records) > MAX_RECORDED_VALUES:
+                raise ValueError(
+                    f"Instruction {inst.name} has {len(records)} recording ops, "
+                    f"exceeds MAX_RECORDED_VALUES ({MAX_RECORDED_VALUES})"
+                )
+            record_table[inst.name] = records
     func_count = len(record_function_indexes)
 
     for name, index in record_function_indexes.items():
         out.emit(f"#define {name}_INDEX {index}\n")
-    args = "_PyJitTracerState *tracer, _PyInterpreterFrame *frame, _PyStackRef *stackpointer, int oparg"
-    out.emit("const uint8_t _PyOpcode_RecordFunctionIndices[256] = {\n")
-    for inst_name, record_name in record_table.items():
-        out.emit(f"    [{inst_name}] = {record_name}_INDEX,\n")
+    out.emit("\n")
+    out.emit("const _PyOpcodeRecordEntry _PyOpcode_RecordEntries[256] = {\n")
+    for inst_name, record_names in record_table.items():
+        indices = ", ".join(f"{name}_INDEX" for name in record_names)
+        out.emit(f"    [{inst_name}] = {{{len(record_names)}, {{{indices}}}}},\n")
     out.emit("};\n\n")
     out.emit(f"const _Py_RecordFuncPtr _PyOpcode_RecordFunctions[{func_count+1}] = {{\n")
     out.emit("    [0] = NULL,\n")