]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-148571: [JIT] Preserve family-head recorder layouts for specialized opcode familie...
authorHai Zhu <haiizhu@outlook.com>
Tue, 28 Apr 2026 12:41:16 +0000 (20:41 +0800)
committerGitHub <noreply@github.com>
Tue, 28 Apr 2026 12:41:16 +0000 (13:41 +0100)
* Records the same objects for each member of family before execution
* Records derived values when recording the trace
* This makes sure that specialization, or deoptimization, does not cause invalid values to be recorded

Include/internal/pycore_optimizer.h
Lib/test/test_capi/test_opt.py
Lib/test/test_generated_cases.py
Misc/NEWS.d/next/Core_and_Builtins/2026-04-18-16-41-04.gh-issue-148571.Q6WB3A.rst [new file with mode: 0644]
Python/optimizer.c
Python/record_functions.c.h
Tools/cases_generator/record_function_generator.py

index 7c2e0e95a80c3fc89b7e61af6d72d231846ab053..f356d60ae5c7a75c3cb310c12b4573bca4e63769 100644 (file)
@@ -534,7 +534,21 @@ typedef struct {
     uint8_t count;
     uint8_t indices[MAX_RECORDED_VALUES];
 } _PyOpcodeRecordEntry;
+
+typedef struct {
+    uint8_t count;
+    uint8_t transform_mask;
+    uint8_t slots[MAX_RECORDED_VALUES];
+} _PyOpcodeRecordSlotMap;
+
 PyAPI_DATA(const _PyOpcodeRecordEntry) _PyOpcode_RecordEntries[256];
+PyAPI_DATA(const _PyOpcodeRecordSlotMap) _PyOpcode_RecordSlotMaps[256];
+
+/* Convert a family-recorded value to the form a recorder uop expects.
+ * If no transform is needed, return the input value unchanged.
+ * Takes ownership of `value` and returns a new strong reference or NULL.
+ */
+PyAPI_FUNC(PyObject *) _PyOpcode_RecordTransformValue(int uop, PyObject *value);
 #endif
 
 #ifdef __cplusplus
index 39075fc64cf02bf983629e80d1b4b0016f6638af..b37c35495983c31ce2520c7a3333912cbcbcfd03 100644 (file)
@@ -5849,6 +5849,19 @@ class TestUopsOptimization(unittest.TestCase):
         self.assertNotIn("_LOAD_SUPER_ATTR_METHOD", uops)
         self.assertEqual(uops.count("_GUARD_NOS_TYPE_VERSION"), 2)
 
+    def test_settrace_then_polymorphic_call_does_not_crash(self):
+        script_helper.assert_python_ok("-c", textwrap.dedent("""
+            import sys
+            sys.settrace(lambda *_: None)
+            sys.settrace(None)
+
+            class C:
+                def __init__(self, x):
+                    pass
+
+            for i in 0, 1, 0, 1:
+                C(0) if i else str(0)
+        """))
 
 def global_identity(x):
     return x
index 62cf0c0c6af0b2244bbc6083beecf23ea764fb7e..748309b54593a182a2fe481a769678e373808680 100644 (file)
@@ -2074,19 +2074,33 @@ class TestRecorderTableGeneration(unittest.TestCase):
             pass
         super().tearDown()
 
-    def generate_tables(self, input: str) -> str:
-        import io
+    def analyze_input(self, input: str):
         with open(self.temp_input_filename, "w+") as f:
             f.write(parser.BEGIN_MARKER)
             f.write(input)
             f.write(parser.END_MARKER)
         with handle_stderr():
-            analysis = analyze_files([self.temp_input_filename])
+            return analyze_files([self.temp_input_filename])
+
+    def generate_tables(self, input: str) -> str:
+        import io
+        analysis = self.analyze_input(input)
         buf = io.StringIO()
         out = CWriter(buf, 0, False)
         record_function_generator.generate_recorder_tables(analysis, out)
         return buf.getvalue()
 
+    def get_slot_map_section(self, output: str) -> str:
+        return output.split(
+            "const _PyOpcodeRecordSlotMap _PyOpcode_RecordSlotMaps[256] = {\n",
+            1,
+        )[1].split("};\n\n", 1)[0]
+
+    def assert_slot_map_lines(self, output: str, *lines: str) -> None:
+        slot_map_section = self.get_slot_map_section(output)
+        for line in lines:
+            self.assertIn(line, slot_map_section)
+
     def test_single_recording_uop_generates_count(self):
         input = """
         tier2 op(_RECORD_TOS, (value -- value)) {
@@ -2145,6 +2159,173 @@ class TestRecorderTableGeneration(unittest.TestCase):
         with self.assertRaisesRegex(ValueError, "exceeds MAX_RECORDED_VALUES"):
             self.generate_tables(input)
 
+    def test_family_member_needs_transform_only_when_shape_changes(self):
+        input = """
+        tier2 op(_RECORD_TOS, (value -- value)) {
+            RECORD_VALUE(value);
+        }
+        tier2 op(_RECORD_TOS_TYPE, (value -- value)) {
+            RECORD_VALUE(Py_TYPE(value));
+        }
+        op(_DO_STUFF, (value -- res)) {
+            res = value;
+        }
+        macro(OP_RAW) = _RECORD_TOS + _DO_STUFF;
+        macro(OP_RAW_SPECIALIZED) = _RECORD_TOS_TYPE + _DO_STUFF;
+        family(OP_RAW, INLINE_CACHE_ENTRIES_OP_RAW) = { OP_RAW_SPECIALIZED };
+
+        macro(OP_TYPED) = _RECORD_TOS_TYPE + _DO_STUFF;
+        macro(OP_TYPED_SPECIALIZED) = _RECORD_TOS_TYPE + _DO_STUFF;
+        family(OP_TYPED, INLINE_CACHE_ENTRIES_OP_TYPED) = { OP_TYPED_SPECIALIZED };
+        """
+        output = self.generate_tables(input)
+        self.assert_slot_map_lines(
+            output,
+            "[OP_RAW] = {1, 1, {0}}",
+            "[OP_RAW_SPECIALIZED] = {1, 0, {0}}",
+            "[OP_TYPED] = {1, 0, {0}}",
+            "[OP_TYPED_SPECIALIZED] = {1, 0, {0}}",
+        )
+
+    def test_family_member_maps_positional_recorders_to_family_slots(self):
+        input = """
+        tier2 op(_RECORD_TOS, (sub -- sub)) {
+            RECORD_VALUE(sub);
+        }
+        tier2 op(_RECORD_NOS, (container, sub -- container, sub)) {
+            RECORD_VALUE(container);
+        }
+        op(_DO_STUFF, (container, sub -- res)) {
+            res = container;
+        }
+        macro(OP) = _RECORD_TOS + _RECORD_NOS + _DO_STUFF;
+        macro(OP_SPECIALIZED) = _RECORD_NOS + _DO_STUFF;
+        family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
+        """
+        output = self.generate_tables(input)
+        self.assert_slot_map_lines(
+            output,
+            "[OP] = {2, 0, {1, 0}}",
+            "[OP_SPECIALIZED] = {1, 0, {0}}",
+        )
+
+    def test_family_member_maps_non_positional_recorders_by_stack_shape(self):
+        input = """
+        tier2 op(_RECORD_CALLABLE, (callable, self, args[oparg] -- callable, self, args[oparg])) {
+            RECORD_VALUE(callable);
+        }
+        tier2 op(_RECORD_BOUND_METHOD, (callable, self, args[oparg] -- callable, self, args[oparg])) {
+            RECORD_VALUE(callable);
+        }
+        op(_DO_STUFF, (callable, self, args[oparg] -- res)) {
+            res = callable;
+        }
+        macro(OP) = _RECORD_CALLABLE + _DO_STUFF;
+        macro(OP_SPECIALIZED) = _RECORD_BOUND_METHOD + _DO_STUFF;
+        family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
+        """
+        output = self.generate_tables(input)
+        self.assert_slot_map_lines(
+            output,
+            "[OP] = {1, 1, {0}}",
+            "[OP_SPECIALIZED] = {1, 0, {0}}",
+        )
+
+    def test_family_head_records_union_of_member_recorders(self):
+        input = """
+        tier2 op(_RECORD_TOS, (value -- value)) {
+            RECORD_VALUE(value);
+        }
+        op(_DO_STUFF, (value -- res)) {
+            res = value;
+        }
+        macro(OP) = _DO_STUFF;
+        macro(OP_SPECIALIZED) = _RECORD_TOS + _DO_STUFF;
+        family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
+        """
+        output = self.generate_tables(input)
+        self.assertIn("[OP] = {1, {_RECORD_TOS_INDEX}}", output)
+        self.assertIn("[OP_SPECIALIZED] = {1, {_RECORD_TOS_INDEX}}", output)
+        self.assert_slot_map_lines(output, "[OP_SPECIALIZED] = {1, 0, {0}}")
+
+    def test_family_detects_base_and_specialized_recording_difference(self):
+        input = """
+        tier2 op(_RECORD_TOS, (value -- value)) {
+            RECORD_VALUE(value);
+        }
+        tier2 op(_RECORD_TOS_TYPE, (value -- value)) {
+            RECORD_VALUE(Py_TYPE(value));
+        }
+        op(_DO_STUFF, (value -- res)) {
+            res = value;
+        }
+        macro(OP) = _RECORD_TOS + _DO_STUFF;
+        macro(OP_SPECIALIZED) = _RECORD_TOS_TYPE + _DO_STUFF;
+        family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
+        """
+        analysis = self.analyze_input(input)
+        output = self.generate_tables(input)
+        self.assertEqual(
+            record_function_generator.get_instruction_record_names(
+                analysis.instructions["OP"]
+            ),
+            ["_RECORD_TOS"],
+        )
+        self.assertEqual(
+            record_function_generator.get_instruction_record_names(
+                analysis.instructions["OP_SPECIALIZED"]
+            ),
+            ["_RECORD_TOS_TYPE"],
+        )
+        self.assertIn("[OP] = {1, {_RECORD_TOS_TYPE_INDEX}}", output)
+        self.assertIn("[OP_SPECIALIZED] = {1, {_RECORD_TOS_TYPE_INDEX}}", output)
+        self.assert_slot_map_lines(
+            output,
+            "[OP] = {1, 1, {0}}",
+            "[OP_SPECIALIZED] = {1, 0, {0}}",
+        )
+
+    def test_family_head_falls_back_for_missing_member_slots(self):
+        input = """
+        tier2 op(_RECORD_TOS, (value -- value)) {
+            RECORD_VALUE(value);
+        }
+        op(_DO_STUFF, (value -- res)) {
+            res = value;
+        }
+        macro(OP) = _RECORD_TOS + _DO_STUFF;
+        macro(OP_SPECIALIZED) = _DO_STUFF;
+        family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
+        """
+        output = self.generate_tables(input)
+        self.assertIn("[OP] = {1, {_RECORD_TOS_INDEX}}", output)
+        self.assertIn("[OP_SPECIALIZED] = {1, {_RECORD_TOS_INDEX}}", output)
+
+    def test_family_mixed_slots_only_transform_changed_recorders(self):
+        input = """
+        tier2 op(_RECORD_TOS_TYPE, (left, right -- left, right)) {
+            RECORD_VALUE(Py_TYPE(right));
+        }
+        tier2 op(_RECORD_NOS_TYPE, (left, right -- left, right)) {
+            RECORD_VALUE(Py_TYPE(left));
+        }
+        tier2 op(_RECORD_NOS, (left, right -- left, right)) {
+            RECORD_VALUE(left);
+        }
+        op(_DO_STUFF, (left, right -- res)) {
+            res = left;
+        }
+        macro(OP) = _RECORD_TOS_TYPE + _RECORD_NOS_TYPE + _DO_STUFF;
+        macro(OP_SPECIALIZED) = _RECORD_NOS + _DO_STUFF;
+        family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
+        """
+        output = self.generate_tables(input)
+        self.assertIn("[OP] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}", output)
+        self.assert_slot_map_lines(
+            output,
+            "[OP] = {2, 2, {1, 0}}",
+            "[OP_SPECIALIZED] = {1, 0, {0}}",
+        )
 
 class TestGeneratedAbstractCases(unittest.TestCase):
     def setUp(self) -> None:
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-18-16-41-04.gh-issue-148571.Q6WB3A.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-18-16-41-04.gh-issue-148571.Q6WB3A.rst
new file mode 100644 (file)
index 0000000..70eeada
--- /dev/null
@@ -0,0 +1 @@
+Fix a crash in the JIT optimizer when specialized opcode families inherited incompatible recorded operand layouts.
index 2ce4da0910f3c49642aa9147bbc98a7ab0f1fecd..820a0771a2cb0fd0fdcf9a7d0d6cfb35450aad1d 100644 (file)
@@ -660,6 +660,44 @@ is_terminator(const _PyUOpInstruction *uop)
     );
 }
 
+static PyObject *
+record_trace_transform_to_type(PyObject *value)
+{
+    PyObject *tp = Py_NewRef((PyObject *)Py_TYPE(value));
+    Py_DECREF(value);
+    return tp;
+}
+
+/* _RECORD_NOS_GEN_FUNC and _RECORD_3OS_GEN_FUNC record the raw receiver.
+ * If it is a generator, return its function object; otherwise return NULL.
+ */
+static PyObject *
+record_trace_transform_gen_func(PyObject *value)
+{
+    PyObject *func = NULL;
+    if (PyGen_Check(value)) {
+        _PyStackRef f = ((PyGenObject *)value)->gi_iframe.f_funcobj;
+        if (!PyStackRef_IsNull(f)) {
+            func = Py_NewRef(PyStackRef_AsPyObjectBorrow(f));
+        }
+    }
+    Py_DECREF(value);
+    return func;
+}
+
+/* _RECORD_BOUND_METHOD records the raw callable.
+ * Keep it only for bound methods; otherwise return NULL.
+ */
+static PyObject *
+record_trace_transform_bound_method(PyObject *value)
+{
+    if (Py_TYPE(value) == &PyMethod_Type) {
+        return value;
+    }
+    Py_DECREF(value);
+    return NULL;
+}
+
 /* Returns 1 on success (added to trace), 0 on trace end.
  */
 // gh-142543: inlining this function causes stack overflows
@@ -833,6 +871,8 @@ _PyJit_translate_single_bytecode_to_trace(
     // One for possible _DEOPT, one because _CHECK_VALIDITY itself might _DEOPT
     trace->end -= 2;
 
+    const _PyOpcodeRecordSlotMap *record_slot_map = &_PyOpcode_RecordSlotMaps[opcode];
+
     assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);
     assert(!_PyErr_Occurred(tstate));
 
@@ -1029,8 +1069,15 @@ _PyJit_translate_single_bytecode_to_trace(
                     }
                 }
                 else if (_PyUop_Flags[uop] & HAS_RECORDS_VALUE_FLAG) {
-                    PyObject *recorded_value = tracer->prev_state.recorded_values[record_idx];
-                    tracer->prev_state.recorded_values[record_idx] = NULL;
+                    assert(record_idx < record_slot_map->count);
+                    uint8_t record_slot = record_slot_map->slots[record_idx];
+                    assert(record_slot < tracer->prev_state.recorded_count);
+                    PyObject *recorded_value = tracer->prev_state.recorded_values[record_slot];
+                    tracer->prev_state.recorded_values[record_slot] = NULL;
+                    if ((record_slot_map->transform_mask & (1u << record_idx)) &&
+                        recorded_value != NULL) {
+                        recorded_value = _PyOpcode_RecordTransformValue(uop, recorded_value);
+                    }
                     record_idx++;
                     operand = (uintptr_t)recorded_value;
                 }
index dff13bfb45e5b0a429bec14b7727ba300b24d6a3..504f6e1d9901c3400151a81105dc94b26d99b355 100644 (file)
@@ -103,19 +103,45 @@ void _PyOpcode_RecordFunction_CODE(_PyInterpreterFrame *frame, _PyStackRef *stac
 #define _RECORD_3OS_GEN_FUNC_INDEX 3
 #define _RECORD_NOS_GEN_FUNC_INDEX 4
 #define _RECORD_CALLABLE_INDEX 5
-#define _RECORD_BOUND_METHOD_INDEX 6
-#define _RECORD_CALLABLE_KW_INDEX 7
-#define _RECORD_4OS_INDEX 8
-#define _RECORD_NOS_TYPE_INDEX 9
+#define _RECORD_CALLABLE_KW_INDEX 6
+#define _RECORD_4OS_INDEX 7
 
 const _PyOpcodeRecordEntry _PyOpcode_RecordEntries[256] = {
+        [TO_BOOL_BOOL] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [TO_BOOL_NONE] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [LOAD_SUPER_ATTR_ATTR] = {1, {_RECORD_NOS_INDEX}},
+        [TO_BOOL] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [TO_BOOL_INT] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [TO_BOOL_LIST] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [TO_BOOL_STR] = {1, {_RECORD_TOS_TYPE_INDEX}},
         [TO_BOOL_ALWAYS_TRUE] = {1, {_RECORD_TOS_TYPE_INDEX}},
-        [BINARY_OP_SUBSCR_GETITEM] = {1, {_RECORD_NOS_INDEX}},
+        [BINARY_OP_MULTIPLY_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+        [BINARY_OP_ADD_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+        [BINARY_OP_SUBTRACT_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+        [BINARY_OP_MULTIPLY_FLOAT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+        [BINARY_OP_ADD_FLOAT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+        [BINARY_OP_SUBTRACT_FLOAT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+        [BINARY_OP_ADD_UNICODE] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+        [BINARY_OP_EXTEND] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+        [BINARY_OP_INPLACE_ADD_UNICODE] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+        [BINARY_OP_SUBSCR_LIST_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+        [BINARY_OP_SUBSCR_LIST_SLICE] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+        [BINARY_OP_SUBSCR_STR_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+        [BINARY_OP_SUBSCR_USTR_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+        [BINARY_OP_SUBSCR_TUPLE_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+        [BINARY_OP_SUBSCR_DICT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+        [BINARY_OP_SUBSCR_GETITEM] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+        [SEND] = {1, {_RECORD_3OS_GEN_FUNC_INDEX}},
         [SEND_GEN] = {1, {_RECORD_3OS_GEN_FUNC_INDEX}},
+        [STORE_ATTR] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [LOAD_SUPER_ATTR] = {1, {_RECORD_NOS_INDEX}},
         [LOAD_SUPER_ATTR_METHOD] = {1, {_RECORD_NOS_INDEX}},
+        [LOAD_ATTR] = {1, {_RECORD_TOS_TYPE_INDEX}},
         [LOAD_ATTR_INSTANCE_VALUE] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [LOAD_ATTR_MODULE] = {1, {_RECORD_TOS_TYPE_INDEX}},
         [LOAD_ATTR_WITH_HINT] = {1, {_RECORD_TOS_TYPE_INDEX}},
         [LOAD_ATTR_SLOT] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [LOAD_ATTR_CLASS] = {1, {_RECORD_TOS_TYPE_INDEX}},
         [LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = {1, {_RECORD_TOS_TYPE_INDEX}},
         [LOAD_ATTR_PROPERTY] = {1, {_RECORD_TOS_TYPE_INDEX}},
         [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = {1, {_RECORD_TOS_TYPE_INDEX}},
@@ -125,6 +151,11 @@ const _PyOpcodeRecordEntry _PyOpcode_RecordEntries[256] = {
         [GET_ITER] = {1, {_RECORD_TOS_TYPE_INDEX}},
         [GET_ITER_SELF] = {1, {_RECORD_TOS_TYPE_INDEX}},
         [GET_ITER_VIRTUAL] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [FOR_ITER] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}},
+        [FOR_ITER_VIRTUAL] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}},
+        [FOR_ITER_LIST] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}},
+        [FOR_ITER_TUPLE] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}},
+        [FOR_ITER_RANGE] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}},
         [FOR_ITER_GEN] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}},
         [LOAD_SPECIAL] = {1, {_RECORD_TOS_TYPE_INDEX}},
         [LOAD_ATTR_METHOD_WITH_VALUES] = {1, {_RECORD_TOS_TYPE_INDEX}},
@@ -132,34 +163,104 @@ const _PyOpcodeRecordEntry _PyOpcode_RecordEntries[256] = {
         [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = {1, {_RECORD_TOS_TYPE_INDEX}},
         [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = {1, {_RECORD_TOS_TYPE_INDEX}},
         [LOAD_ATTR_METHOD_LAZY_DICT] = {1, {_RECORD_TOS_TYPE_INDEX}},
+        [CALL] = {1, {_RECORD_CALLABLE_INDEX}},
         [CALL_PY_GENERAL] = {1, {_RECORD_CALLABLE_INDEX}},
-        [CALL_BOUND_METHOD_GENERAL] = {1, {_RECORD_BOUND_METHOD_INDEX}},
+        [CALL_BOUND_METHOD_GENERAL] = {1, {_RECORD_CALLABLE_INDEX}},
         [CALL_NON_PY_GENERAL] = {1, {_RECORD_CALLABLE_INDEX}},
-        [CALL_BOUND_METHOD_EXACT_ARGS] = {1, {_RECORD_BOUND_METHOD_INDEX}},
+        [CALL_BOUND_METHOD_EXACT_ARGS] = {1, {_RECORD_CALLABLE_INDEX}},
         [CALL_PY_EXACT_ARGS] = {1, {_RECORD_CALLABLE_INDEX}},
+        [CALL_TYPE_1] = {1, {_RECORD_CALLABLE_INDEX}},
+        [CALL_STR_1] = {1, {_RECORD_CALLABLE_INDEX}},
+        [CALL_TUPLE_1] = {1, {_RECORD_CALLABLE_INDEX}},
         [CALL_ALLOC_AND_ENTER_INIT] = {1, {_RECORD_CALLABLE_INDEX}},
         [CALL_BUILTIN_CLASS] = {1, {_RECORD_CALLABLE_INDEX}},
         [CALL_BUILTIN_O] = {1, {_RECORD_CALLABLE_INDEX}},
         [CALL_BUILTIN_FAST] = {1, {_RECORD_CALLABLE_INDEX}},
         [CALL_BUILTIN_FAST_WITH_KEYWORDS] = {1, {_RECORD_CALLABLE_INDEX}},
+        [CALL_LEN] = {1, {_RECORD_CALLABLE_INDEX}},
+        [CALL_ISINSTANCE] = {1, {_RECORD_CALLABLE_INDEX}},
+        [CALL_LIST_APPEND] = {1, {_RECORD_CALLABLE_INDEX}},
         [CALL_METHOD_DESCRIPTOR_O] = {1, {_RECORD_CALLABLE_INDEX}},
         [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = {1, {_RECORD_CALLABLE_INDEX}},
         [CALL_METHOD_DESCRIPTOR_NOARGS] = {1, {_RECORD_CALLABLE_INDEX}},
+        [CALL_METHOD_DESCRIPTOR_FAST] = {1, {_RECORD_CALLABLE_INDEX}},
         [CALL_KW_PY] = {1, {_RECORD_CALLABLE_KW_INDEX}},
         [CALL_KW_BOUND_METHOD] = {1, {_RECORD_CALLABLE_KW_INDEX}},
+        [CALL_KW] = {1, {_RECORD_CALLABLE_KW_INDEX}},
+        [CALL_KW_NON_PY] = {1, {_RECORD_CALLABLE_KW_INDEX}},
+        [CALL_FUNCTION_EX] = {1, {_RECORD_4OS_INDEX}},
         [CALL_EX_PY] = {1, {_RECORD_4OS_INDEX}},
-        [BINARY_OP] = {2, {_RECORD_TOS_TYPE_INDEX, _RECORD_NOS_TYPE_INDEX}},
+        [CALL_EX_NON_PY_GENERAL] = {1, {_RECORD_4OS_INDEX}},
+        [BINARY_OP] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
 };
 
-const _Py_RecordFuncPtr _PyOpcode_RecordFunctions[10] = {
+const _PyOpcodeRecordSlotMap _PyOpcode_RecordSlotMaps[256] = {
+        [TO_BOOL_ALWAYS_TRUE] = {1, 0, {0}},
+        [BINARY_OP_SUBSCR_GETITEM] = {1, 0, {0}},
+        [SEND_GEN] = {1, 0, {0}},
+        [LOAD_SUPER_ATTR_METHOD] = {1, 0, {0}},
+        [LOAD_ATTR_INSTANCE_VALUE] = {1, 0, {0}},
+        [LOAD_ATTR_WITH_HINT] = {1, 0, {0}},
+        [LOAD_ATTR_SLOT] = {1, 0, {0}},
+        [LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = {1, 0, {0}},
+        [LOAD_ATTR_PROPERTY] = {1, 0, {0}},
+        [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = {1, 0, {0}},
+        [STORE_ATTR_INSTANCE_VALUE] = {1, 0, {0}},
+        [STORE_ATTR_WITH_HINT] = {1, 0, {0}},
+        [STORE_ATTR_SLOT] = {1, 0, {0}},
+        [GET_ITER] = {1, 0, {0}},
+        [GET_ITER_SELF] = {1, 0, {0}},
+        [GET_ITER_VIRTUAL] = {1, 0, {0}},
+        [FOR_ITER_GEN] = {1, 0, {0}},
+        [LOAD_SPECIAL] = {1, 0, {0}},
+        [LOAD_ATTR_METHOD_WITH_VALUES] = {1, 0, {0}},
+        [LOAD_ATTR_METHOD_NO_DICT] = {1, 0, {0}},
+        [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = {1, 0, {0}},
+        [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = {1, 0, {0}},
+        [LOAD_ATTR_METHOD_LAZY_DICT] = {1, 0, {0}},
+        [CALL_PY_GENERAL] = {1, 0, {0}},
+        [CALL_BOUND_METHOD_GENERAL] = {1, 1, {0}},
+        [CALL_NON_PY_GENERAL] = {1, 0, {0}},
+        [CALL_BOUND_METHOD_EXACT_ARGS] = {1, 1, {0}},
+        [CALL_PY_EXACT_ARGS] = {1, 0, {0}},
+        [CALL_ALLOC_AND_ENTER_INIT] = {1, 0, {0}},
+        [CALL_BUILTIN_CLASS] = {1, 0, {0}},
+        [CALL_BUILTIN_O] = {1, 0, {0}},
+        [CALL_BUILTIN_FAST] = {1, 0, {0}},
+        [CALL_BUILTIN_FAST_WITH_KEYWORDS] = {1, 0, {0}},
+        [CALL_METHOD_DESCRIPTOR_O] = {1, 0, {0}},
+        [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = {1, 0, {0}},
+        [CALL_METHOD_DESCRIPTOR_NOARGS] = {1, 0, {0}},
+        [CALL_KW_PY] = {1, 0, {0}},
+        [CALL_KW_BOUND_METHOD] = {1, 0, {0}},
+        [CALL_EX_PY] = {1, 0, {0}},
+        [BINARY_OP] = {2, 2, {1, 0}},
+};
+
+const _Py_RecordFuncPtr _PyOpcode_RecordFunctions[8] = {
         [0] = NULL,
         [_RECORD_TOS_TYPE_INDEX] = _PyOpcode_RecordFunction_TOS_TYPE,
         [_RECORD_NOS_INDEX] = _PyOpcode_RecordFunction_NOS,
         [_RECORD_3OS_GEN_FUNC_INDEX] = _PyOpcode_RecordFunction_3OS_GEN_FUNC,
         [_RECORD_NOS_GEN_FUNC_INDEX] = _PyOpcode_RecordFunction_NOS_GEN_FUNC,
         [_RECORD_CALLABLE_INDEX] = _PyOpcode_RecordFunction_CALLABLE,
-        [_RECORD_BOUND_METHOD_INDEX] = _PyOpcode_RecordFunction_BOUND_METHOD,
         [_RECORD_CALLABLE_KW_INDEX] = _PyOpcode_RecordFunction_CALLABLE_KW,
         [_RECORD_4OS_INDEX] = _PyOpcode_RecordFunction_4OS,
-        [_RECORD_NOS_TYPE_INDEX] = _PyOpcode_RecordFunction_NOS_TYPE,
 };
+
+PyObject *
+_PyOpcode_RecordTransformValue(int uop, PyObject *value)
+{
+        switch (uop) {
+                case _RECORD_TOS_TYPE:
+                case _RECORD_NOS_TYPE:
+                    return record_trace_transform_to_type(value);
+                case _RECORD_NOS_GEN_FUNC:
+                case _RECORD_3OS_GEN_FUNC:
+                    return record_trace_transform_gen_func(value);
+                case _RECORD_BOUND_METHOD:
+                    return record_trace_transform_bound_method(value);
+                default:
+                    return value;
+        }
+}
index d7ae0ebf79fe6257d269734ba0ebb56258b0694c..6f518ffdcf2ac206279313030b1eb7a2118ece17 100644 (file)
@@ -28,6 +28,21 @@ DEFAULT_OUTPUT = ROOT / "Python/recorder_functions.c.h"
 # Must match MAX_RECORDED_VALUES in Include/internal/pycore_optimizer.h.
 MAX_RECORDED_VALUES = 3
 
+# Map `_RECORD_*` uops to the helper that converts a raw family-recorded
+# value to the form the specialized member consumes.
+_RECORD_TRANSFORM_HELPERS: dict[str, str] = {
+    "_RECORD_TOS_TYPE": "record_trace_transform_to_type",
+    "_RECORD_NOS_TYPE": "record_trace_transform_to_type",
+    "_RECORD_NOS_GEN_FUNC": "record_trace_transform_gen_func",
+    "_RECORD_3OS_GEN_FUNC": "record_trace_transform_gen_func",
+    "_RECORD_BOUND_METHOD": "record_trace_transform_bound_method",
+}
+
+# Recorder uops whose slot kind differs from the leading word of their name.
+_RECORD_SLOT_KIND_OVERRIDES: dict[str, str] = {
+    "_RECORD_BOUND_METHOD": "CALLABLE",
+}
+
 
 class RecorderEmitter(Emitter):
     def __init__(self, out: CWriter):
@@ -52,9 +67,83 @@ class RecorderEmitter(Emitter):
         return True
 
 
+def get_record_slot_kind(record_name: str) -> str:
+    if record_name in _RECORD_SLOT_KIND_OVERRIDES:
+        return _RECORD_SLOT_KIND_OVERRIDES[record_name]
+    if not record_name.startswith("_RECORD_"):
+        return record_name
+    return record_name.removeprefix("_RECORD_").partition("_")[0]
+
+
+def get_instruction_record_names(inst: Instruction) -> list[str]:
+    return [part.name for part in inst.parts if part.properties.records_value]
+
+
+def get_family_record_names(
+    family_head: Instruction,
+    family_members: list[Instruction],
+    instruction_records: dict[str, list[str]],
+    record_slot_keys: dict[str, str],
+) -> list[str]:
+    member_records = [instruction_records[m.name] for m in family_members]
+    all_member_names = {n for names in member_records for n in names}
+    records: list[str] = []
+    slot_index: dict[str, int] = {}
+
+    def add(name: str) -> None:
+        kind = record_slot_keys[name]
+        # Prefer the raw recorder if any member uses it; otherwise the given form.
+        raw = f"_RECORD_{kind}"
+        source = raw if raw in all_member_names else name
+        existing = slot_index.get(kind)
+        if existing is None:
+            slot_index[kind] = len(records)
+            records.append(source)
+        elif records[existing] != source:
+            raise ValueError(
+                f"Family {family_head.name} has incompatible recorders for "
+                f"slot {kind}: {records[existing]} and {source}"
+            )
+
+    for names in member_records:
+        for name in names:
+            add(name)
+    # Family head supplies any slots no member exercises.
+    for name in instruction_records[family_head.name]:
+        if record_slot_keys[name] not in slot_index:
+            slot_index[record_slot_keys[name]] = len(records)
+            records.append(name)
+    return records
+
+
+def get_record_consumer_layout(
+    inst_name: str,
+    source_records: list[str],
+    own_records: list[str],
+    record_slot_keys: dict[str, str],
+) -> tuple[list[int], int]:
+    used = [False] * len(source_records)
+    slot_map: list[int] = []
+    transform_mask = 0
+    for i, own in enumerate(own_records):
+        own_kind = record_slot_keys[own]
+        for j, src in enumerate(source_records):
+            if not used[j] and record_slot_keys[src] == own_kind:
+                used[j] = True
+                slot_map.append(j)
+                if src != own:
+                    transform_mask |= 1 << i
+                break
+        else:
+            raise ValueError(
+                f"Instruction {inst_name} has no compatible family slot for "
+                f"{own} in {source_records}"
+            )
+    return slot_map, transform_mask
+
 def generate_recorder_functions(filenames: list[str], analysis: Analysis, out: CWriter) -> None:
-    write_header(__file__, filenames, outfile)
-    outfile.write(
+    write_header(__file__, filenames, out.out)
+    out.out.write(
         """
 #ifdef TIER_ONE
     #error "This file is for Tier 2 only"
@@ -63,13 +152,10 @@ def generate_recorder_functions(filenames: list[str], analysis: Analysis, out: C
     )
     args = "_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, int oparg, PyObject **recorded_value"
     emitter = RecorderEmitter(out)
-    func_count = 0
     nop = analysis.instructions["NOP"]
-    function_table: dict[str, int] = dict()
-    for name, uop in analysis.uops.items():
+    for uop in analysis.uops.values():
         if not uop.properties.records_value:
             continue
-        func_count += 1
         out.emit(f"void _PyOpcode_RecordFunction{uop.name[7:]}({args}) {{\n")
         seen = {"unused"}
         for var in uop.stack.inputs:
@@ -83,42 +169,109 @@ def generate_recorder_functions(filenames: list[str], analysis: Analysis, out: C
         out.emit("\n\n")
 
 def generate_recorder_tables(analysis: Analysis, out: CWriter) -> None:
-    record_function_indexes: dict[str, int] = dict()
+    instruction_records = {
+        inst.name: get_instruction_record_names(inst)
+        for inst in analysis.instructions.values()
+    }
+    record_uop_names = [
+        name for name, uop in analysis.uops.items() if uop.properties.records_value
+    ]
+    record_slot_keys = {name: get_record_slot_kind(name) for name in record_uop_names}
+    family_record_table = {
+        family.name: get_family_record_names(
+            analysis.instructions[family.name],
+            family.members,
+            instruction_records,
+            record_slot_keys,
+        )
+        for family in analysis.families.values()
+    }
+
     record_table: dict[str, list[str]] = {}
-    index = 1
+    record_consumer_table: dict[str, tuple[list[int], int]] = {}
+    record_function_indexes: dict[str, int] = {}
     for inst in analysis.instructions.values():
-        if not inst.properties.records_value:
+        own_records = instruction_records[inst.name]
+        # TRACE_RECORD runs before execution, but specialization may rewrite
+        # the opcode before translation. Record the raw family shape (union
+        # of head + members) so any opcode in the family can be translated
+        # from the same recorded layout.
+        family = inst.family or analysis.families.get(inst.name)
+        records = family_record_table[family.name] if family is not None else own_records
+        if not records:
             continue
-        records: list[str] = []
-        for part in inst.parts:
-            if not part.properties.records_value:
-                continue
-            if part.name not in record_function_indexes:
-                record_function_indexes[part.name] = index
-                index += 1
-            records.append(part.name)
-        if records:
-            if len(records) > MAX_RECORDED_VALUES:
-                raise ValueError(
-                    f"Instruction {inst.name} has {len(records)} recording ops, "
-                    f"exceeds MAX_RECORDED_VALUES ({MAX_RECORDED_VALUES})"
-                )
-            record_table[inst.name] = records
-    func_count = len(record_function_indexes)
+        if len(records) > MAX_RECORDED_VALUES:
+            raise ValueError(
+                f"Instruction {inst.name} has {len(records)} recording ops, "
+                f"exceeds MAX_RECORDED_VALUES ({MAX_RECORDED_VALUES})"
+            )
+        record_table[inst.name] = records
+        for name in records:
+            if name not in record_function_indexes:
+                record_function_indexes[name] = len(record_function_indexes) + 1
+        if own_records:
+            record_consumer_table[inst.name] = get_record_consumer_layout(
+                inst.name, records, own_records, record_slot_keys
+            )
 
     for name, index in record_function_indexes.items():
         out.emit(f"#define {name}_INDEX {index}\n")
     out.emit("\n")
+
     out.emit("const _PyOpcodeRecordEntry _PyOpcode_RecordEntries[256] = {\n")
-    for inst_name, record_names in record_table.items():
-        indices = ", ".join(f"{name}_INDEX" for name in record_names)
-        out.emit(f"    [{inst_name}] = {{{len(record_names)}, {{{indices}}}}},\n")
+    for inst_name, records in record_table.items():
+        indices = ", ".join(f"{name}_INDEX" for name in records)
+        out.emit(f"    [{inst_name}] = {{{len(records)}, {{{indices}}}}},\n")
+    out.emit("};\n\n")
+
+    out.emit("const _PyOpcodeRecordSlotMap _PyOpcode_RecordSlotMaps[256] = {\n")
+    for inst_name, (slots, mask) in record_consumer_table.items():
+        slot_list = ", ".join(str(s) for s in slots)
+        out.emit(
+            f"    [{inst_name}] = {{{len(slots)}, {mask}, {{{slot_list}}}}},\n"
+        )
     out.emit("};\n\n")
-    out.emit(f"const _Py_RecordFuncPtr _PyOpcode_RecordFunctions[{func_count+1}] = {{\n")
+
+    out.emit(
+        f"const _Py_RecordFuncPtr _PyOpcode_RecordFunctions"
+        f"[{len(record_function_indexes) + 1}] = {{\n"
+    )
     out.emit("    [0] = NULL,\n")
     for name in record_function_indexes:
         out.emit(f"    [{name}_INDEX] = _PyOpcode_RecordFunction{name[7:]},\n")
     out.emit("};\n")
+    generate_record_transform_dispatcher(record_uop_names, out)
+
+
+def generate_record_transform_dispatcher(
+    record_uop_names: list[str], out: CWriter
+) -> None:
+    """Emit a switch that converts a family-recorded value for a recorder uop.
+
+    Only `_RECORD_*` uops that need conversion get a case; the default
+    returns the input value unchanged. Helpers live in Python/optimizer.c.
+    """
+    cases: dict[str, list[str]] = {}
+    for record_name in record_uop_names:
+        helper = _RECORD_TRANSFORM_HELPERS.get(record_name)
+        if helper is None:
+            continue
+        cases.setdefault(helper, []).append(record_name)
+    out.emit("\n")
+    out.emit(
+        "PyObject *\n"
+        "_PyOpcode_RecordTransformValue(int uop, PyObject *value)\n"
+        "{\n"
+    )
+    out.emit("    switch (uop) {\n")
+    for helper, names in cases.items():
+        for name in names:
+            out.emit(f"        case {name}:\n")
+        out.emit(f"            return {helper}(value);\n")
+    out.emit("        default:\n")
+    out.emit("            return value;\n")
+    out.emit("    }\n")
+    out.emit("}\n")
 
 
 arg_parser = argparse.ArgumentParser(