uint8_t count;
uint8_t indices[MAX_RECORDED_VALUES];
} _PyOpcodeRecordEntry;
+
+typedef struct {
+ uint8_t count;
+ uint8_t transform_mask;
+ uint8_t slots[MAX_RECORDED_VALUES];
+} _PyOpcodeRecordSlotMap;
+
PyAPI_DATA(const _PyOpcodeRecordEntry) _PyOpcode_RecordEntries[256];
+PyAPI_DATA(const _PyOpcodeRecordSlotMap) _PyOpcode_RecordSlotMaps[256];
+
+/* Convert a family-recorded value to the form a recorder uop expects.
+ * If no transform is needed, return the input value unchanged.
+ * Takes ownership of `value` and returns a new strong reference or NULL.
+ */
+PyAPI_FUNC(PyObject *) _PyOpcode_RecordTransformValue(int uop, PyObject *value);
#endif
#ifdef __cplusplus
self.assertNotIn("_LOAD_SUPER_ATTR_METHOD", uops)
self.assertEqual(uops.count("_GUARD_NOS_TYPE_VERSION"), 2)
+ def test_settrace_then_polymorphic_call_does_not_crash(self):
+ script_helper.assert_python_ok("-c", textwrap.dedent("""
+ import sys
+ sys.settrace(lambda *_: None)
+ sys.settrace(None)
+
+ class C:
+ def __init__(self, x):
+ pass
+
+ for i in 0, 1, 0, 1:
+ C(0) if i else str(0)
+ """))
def global_identity(x):
return x
pass
super().tearDown()
- def generate_tables(self, input: str) -> str:
- import io
+ def analyze_input(self, input: str):
with open(self.temp_input_filename, "w+") as f:
f.write(parser.BEGIN_MARKER)
f.write(input)
f.write(parser.END_MARKER)
with handle_stderr():
- analysis = analyze_files([self.temp_input_filename])
+ return analyze_files([self.temp_input_filename])
+
+ def generate_tables(self, input: str) -> str:
+ import io
+ analysis = self.analyze_input(input)
buf = io.StringIO()
out = CWriter(buf, 0, False)
record_function_generator.generate_recorder_tables(analysis, out)
return buf.getvalue()
+ def get_slot_map_section(self, output: str) -> str:
+ return output.split(
+ "const _PyOpcodeRecordSlotMap _PyOpcode_RecordSlotMaps[256] = {\n",
+ 1,
+ )[1].split("};\n\n", 1)[0]
+
+ def assert_slot_map_lines(self, output: str, *lines: str) -> None:
+ slot_map_section = self.get_slot_map_section(output)
+ for line in lines:
+ self.assertIn(line, slot_map_section)
+
def test_single_recording_uop_generates_count(self):
input = """
tier2 op(_RECORD_TOS, (value -- value)) {
with self.assertRaisesRegex(ValueError, "exceeds MAX_RECORDED_VALUES"):
self.generate_tables(input)
+ def test_family_member_needs_transform_only_when_shape_changes(self):
+ input = """
+ tier2 op(_RECORD_TOS, (value -- value)) {
+ RECORD_VALUE(value);
+ }
+ tier2 op(_RECORD_TOS_TYPE, (value -- value)) {
+ RECORD_VALUE(Py_TYPE(value));
+ }
+ op(_DO_STUFF, (value -- res)) {
+ res = value;
+ }
+ macro(OP_RAW) = _RECORD_TOS + _DO_STUFF;
+ macro(OP_RAW_SPECIALIZED) = _RECORD_TOS_TYPE + _DO_STUFF;
+ family(OP_RAW, INLINE_CACHE_ENTRIES_OP_RAW) = { OP_RAW_SPECIALIZED };
+
+ macro(OP_TYPED) = _RECORD_TOS_TYPE + _DO_STUFF;
+ macro(OP_TYPED_SPECIALIZED) = _RECORD_TOS_TYPE + _DO_STUFF;
+ family(OP_TYPED, INLINE_CACHE_ENTRIES_OP_TYPED) = { OP_TYPED_SPECIALIZED };
+ """
+ output = self.generate_tables(input)
+ self.assert_slot_map_lines(
+ output,
+ "[OP_RAW] = {1, 1, {0}}",
+ "[OP_RAW_SPECIALIZED] = {1, 0, {0}}",
+ "[OP_TYPED] = {1, 0, {0}}",
+ "[OP_TYPED_SPECIALIZED] = {1, 0, {0}}",
+ )
+
+ def test_family_member_maps_positional_recorders_to_family_slots(self):
+ input = """
+ tier2 op(_RECORD_TOS, (sub -- sub)) {
+ RECORD_VALUE(sub);
+ }
+ tier2 op(_RECORD_NOS, (container, sub -- container, sub)) {
+ RECORD_VALUE(container);
+ }
+ op(_DO_STUFF, (container, sub -- res)) {
+ res = container;
+ }
+ macro(OP) = _RECORD_TOS + _RECORD_NOS + _DO_STUFF;
+ macro(OP_SPECIALIZED) = _RECORD_NOS + _DO_STUFF;
+ family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
+ """
+ output = self.generate_tables(input)
+ self.assert_slot_map_lines(
+ output,
+ "[OP] = {2, 0, {1, 0}}",
+ "[OP_SPECIALIZED] = {1, 0, {0}}",
+ )
+
+ def test_family_member_maps_non_positional_recorders_by_stack_shape(self):
+ input = """
+ tier2 op(_RECORD_CALLABLE, (callable, self, args[oparg] -- callable, self, args[oparg])) {
+ RECORD_VALUE(callable);
+ }
+ tier2 op(_RECORD_BOUND_METHOD, (callable, self, args[oparg] -- callable, self, args[oparg])) {
+ RECORD_VALUE(callable);
+ }
+ op(_DO_STUFF, (callable, self, args[oparg] -- res)) {
+ res = callable;
+ }
+ macro(OP) = _RECORD_CALLABLE + _DO_STUFF;
+ macro(OP_SPECIALIZED) = _RECORD_BOUND_METHOD + _DO_STUFF;
+ family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
+ """
+ output = self.generate_tables(input)
+ self.assert_slot_map_lines(
+ output,
+ "[OP] = {1, 1, {0}}",
+ "[OP_SPECIALIZED] = {1, 0, {0}}",
+ )
+
+ def test_family_head_records_union_of_member_recorders(self):
+ input = """
+ tier2 op(_RECORD_TOS, (value -- value)) {
+ RECORD_VALUE(value);
+ }
+ op(_DO_STUFF, (value -- res)) {
+ res = value;
+ }
+ macro(OP) = _DO_STUFF;
+ macro(OP_SPECIALIZED) = _RECORD_TOS + _DO_STUFF;
+ family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
+ """
+ output = self.generate_tables(input)
+ self.assertIn("[OP] = {1, {_RECORD_TOS_INDEX}}", output)
+ self.assertIn("[OP_SPECIALIZED] = {1, {_RECORD_TOS_INDEX}}", output)
+ self.assert_slot_map_lines(output, "[OP_SPECIALIZED] = {1, 0, {0}}")
+
+ def test_family_detects_base_and_specialized_recording_difference(self):
+ input = """
+ tier2 op(_RECORD_TOS, (value -- value)) {
+ RECORD_VALUE(value);
+ }
+ tier2 op(_RECORD_TOS_TYPE, (value -- value)) {
+ RECORD_VALUE(Py_TYPE(value));
+ }
+ op(_DO_STUFF, (value -- res)) {
+ res = value;
+ }
+ macro(OP) = _RECORD_TOS + _DO_STUFF;
+ macro(OP_SPECIALIZED) = _RECORD_TOS_TYPE + _DO_STUFF;
+ family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
+ """
+ analysis = self.analyze_input(input)
+ output = self.generate_tables(input)
+ self.assertEqual(
+ record_function_generator.get_instruction_record_names(
+ analysis.instructions["OP"]
+ ),
+ ["_RECORD_TOS"],
+ )
+ self.assertEqual(
+ record_function_generator.get_instruction_record_names(
+ analysis.instructions["OP_SPECIALIZED"]
+ ),
+ ["_RECORD_TOS_TYPE"],
+ )
+ self.assertIn("[OP] = {1, {_RECORD_TOS_TYPE_INDEX}}", output)
+ self.assertIn("[OP_SPECIALIZED] = {1, {_RECORD_TOS_TYPE_INDEX}}", output)
+ self.assert_slot_map_lines(
+ output,
+ "[OP] = {1, 1, {0}}",
+ "[OP_SPECIALIZED] = {1, 0, {0}}",
+ )
+
+ def test_family_head_falls_back_for_missing_member_slots(self):
+ input = """
+ tier2 op(_RECORD_TOS, (value -- value)) {
+ RECORD_VALUE(value);
+ }
+ op(_DO_STUFF, (value -- res)) {
+ res = value;
+ }
+ macro(OP) = _RECORD_TOS + _DO_STUFF;
+ macro(OP_SPECIALIZED) = _DO_STUFF;
+ family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
+ """
+ output = self.generate_tables(input)
+ self.assertIn("[OP] = {1, {_RECORD_TOS_INDEX}}", output)
+ self.assertIn("[OP_SPECIALIZED] = {1, {_RECORD_TOS_INDEX}}", output)
+
+ def test_family_mixed_slots_only_transform_changed_recorders(self):
+ input = """
+ tier2 op(_RECORD_TOS_TYPE, (left, right -- left, right)) {
+ RECORD_VALUE(Py_TYPE(right));
+ }
+ tier2 op(_RECORD_NOS_TYPE, (left, right -- left, right)) {
+ RECORD_VALUE(Py_TYPE(left));
+ }
+ tier2 op(_RECORD_NOS, (left, right -- left, right)) {
+ RECORD_VALUE(left);
+ }
+ op(_DO_STUFF, (left, right -- res)) {
+ res = left;
+ }
+ macro(OP) = _RECORD_TOS_TYPE + _RECORD_NOS_TYPE + _DO_STUFF;
+ macro(OP_SPECIALIZED) = _RECORD_NOS + _DO_STUFF;
+ family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
+ """
+ output = self.generate_tables(input)
+ self.assertIn("[OP] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}}", output)
+ self.assert_slot_map_lines(
+ output,
+ "[OP] = {2, 2, {1, 0}}",
+ "[OP_SPECIALIZED] = {1, 0, {0}}",
+ )
class TestGeneratedAbstractCases(unittest.TestCase):
def setUp(self) -> None:
--- /dev/null
+Fix a crash in the JIT optimizer when specialized opcode families inherited incompatible recorded operand layouts.
);
}
+static PyObject *
+record_trace_transform_to_type(PyObject *value)
+{
+ PyObject *tp = Py_NewRef((PyObject *)Py_TYPE(value));
+ Py_DECREF(value);
+ return tp;
+}
+
+/* _RECORD_NOS_GEN_FUNC and _RECORD_3OS_GEN_FUNC record the raw receiver.
+ * If it is a generator, return its function object; otherwise return NULL.
+ */
+static PyObject *
+record_trace_transform_gen_func(PyObject *value)
+{
+ PyObject *func = NULL;
+ if (PyGen_Check(value)) {
+ _PyStackRef f = ((PyGenObject *)value)->gi_iframe.f_funcobj;
+ if (!PyStackRef_IsNull(f)) {
+ func = Py_NewRef(PyStackRef_AsPyObjectBorrow(f));
+ }
+ }
+ Py_DECREF(value);
+ return func;
+}
+
+/* _RECORD_BOUND_METHOD records the raw callable.
+ * Keep it only for bound methods; otherwise return NULL.
+ */
+static PyObject *
+record_trace_transform_bound_method(PyObject *value)
+{
+ if (Py_TYPE(value) == &PyMethod_Type) {
+ return value;
+ }
+ Py_DECREF(value);
+ return NULL;
+}
+
/* Returns 1 on success (added to trace), 0 on trace end.
*/
// gh-142543: inlining this function causes stack overflows
// One for possible _DEOPT, one because _CHECK_VALIDITY itself might _DEOPT
trace->end -= 2;
+ const _PyOpcodeRecordSlotMap *record_slot_map = &_PyOpcode_RecordSlotMaps[opcode];
+
assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);
assert(!_PyErr_Occurred(tstate));
}
}
else if (_PyUop_Flags[uop] & HAS_RECORDS_VALUE_FLAG) {
- PyObject *recorded_value = tracer->prev_state.recorded_values[record_idx];
- tracer->prev_state.recorded_values[record_idx] = NULL;
+ assert(record_idx < record_slot_map->count);
+ uint8_t record_slot = record_slot_map->slots[record_idx];
+ assert(record_slot < tracer->prev_state.recorded_count);
+ PyObject *recorded_value = tracer->prev_state.recorded_values[record_slot];
+ tracer->prev_state.recorded_values[record_slot] = NULL;
+ if ((record_slot_map->transform_mask & (1u << record_idx)) &&
+ recorded_value != NULL) {
+ recorded_value = _PyOpcode_RecordTransformValue(uop, recorded_value);
+ }
record_idx++;
operand = (uintptr_t)recorded_value;
}
#define _RECORD_3OS_GEN_FUNC_INDEX 3
#define _RECORD_NOS_GEN_FUNC_INDEX 4
#define _RECORD_CALLABLE_INDEX 5
-#define _RECORD_BOUND_METHOD_INDEX 6
-#define _RECORD_CALLABLE_KW_INDEX 7
-#define _RECORD_4OS_INDEX 8
-#define _RECORD_NOS_TYPE_INDEX 9
+#define _RECORD_CALLABLE_KW_INDEX 6
+#define _RECORD_4OS_INDEX 7
const _PyOpcodeRecordEntry _PyOpcode_RecordEntries[256] = {
+ [TO_BOOL_BOOL] = {1, {_RECORD_TOS_TYPE_INDEX}},
+ [TO_BOOL_NONE] = {1, {_RECORD_TOS_TYPE_INDEX}},
+ [LOAD_SUPER_ATTR_ATTR] = {1, {_RECORD_NOS_INDEX}},
+ [TO_BOOL] = {1, {_RECORD_TOS_TYPE_INDEX}},
+ [TO_BOOL_INT] = {1, {_RECORD_TOS_TYPE_INDEX}},
+ [TO_BOOL_LIST] = {1, {_RECORD_TOS_TYPE_INDEX}},
+ [TO_BOOL_STR] = {1, {_RECORD_TOS_TYPE_INDEX}},
[TO_BOOL_ALWAYS_TRUE] = {1, {_RECORD_TOS_TYPE_INDEX}},
- [BINARY_OP_SUBSCR_GETITEM] = {1, {_RECORD_NOS_INDEX}},
+ [BINARY_OP_MULTIPLY_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+ [BINARY_OP_ADD_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+ [BINARY_OP_SUBTRACT_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+ [BINARY_OP_MULTIPLY_FLOAT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+ [BINARY_OP_ADD_FLOAT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+ [BINARY_OP_SUBTRACT_FLOAT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+ [BINARY_OP_ADD_UNICODE] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+ [BINARY_OP_EXTEND] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+ [BINARY_OP_INPLACE_ADD_UNICODE] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+ [BINARY_OP_SUBSCR_LIST_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+ [BINARY_OP_SUBSCR_LIST_SLICE] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+ [BINARY_OP_SUBSCR_STR_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+ [BINARY_OP_SUBSCR_USTR_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+ [BINARY_OP_SUBSCR_TUPLE_INT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+ [BINARY_OP_SUBSCR_DICT] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+ [BINARY_OP_SUBSCR_GETITEM] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
+ [SEND] = {1, {_RECORD_3OS_GEN_FUNC_INDEX}},
[SEND_GEN] = {1, {_RECORD_3OS_GEN_FUNC_INDEX}},
+ [STORE_ATTR] = {1, {_RECORD_TOS_TYPE_INDEX}},
+ [LOAD_SUPER_ATTR] = {1, {_RECORD_NOS_INDEX}},
[LOAD_SUPER_ATTR_METHOD] = {1, {_RECORD_NOS_INDEX}},
+ [LOAD_ATTR] = {1, {_RECORD_TOS_TYPE_INDEX}},
[LOAD_ATTR_INSTANCE_VALUE] = {1, {_RECORD_TOS_TYPE_INDEX}},
+ [LOAD_ATTR_MODULE] = {1, {_RECORD_TOS_TYPE_INDEX}},
[LOAD_ATTR_WITH_HINT] = {1, {_RECORD_TOS_TYPE_INDEX}},
[LOAD_ATTR_SLOT] = {1, {_RECORD_TOS_TYPE_INDEX}},
+ [LOAD_ATTR_CLASS] = {1, {_RECORD_TOS_TYPE_INDEX}},
[LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = {1, {_RECORD_TOS_TYPE_INDEX}},
[LOAD_ATTR_PROPERTY] = {1, {_RECORD_TOS_TYPE_INDEX}},
[LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = {1, {_RECORD_TOS_TYPE_INDEX}},
[GET_ITER] = {1, {_RECORD_TOS_TYPE_INDEX}},
[GET_ITER_SELF] = {1, {_RECORD_TOS_TYPE_INDEX}},
[GET_ITER_VIRTUAL] = {1, {_RECORD_TOS_TYPE_INDEX}},
+ [FOR_ITER] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}},
+ [FOR_ITER_VIRTUAL] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}},
+ [FOR_ITER_LIST] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}},
+ [FOR_ITER_TUPLE] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}},
+ [FOR_ITER_RANGE] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}},
[FOR_ITER_GEN] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}},
[LOAD_SPECIAL] = {1, {_RECORD_TOS_TYPE_INDEX}},
[LOAD_ATTR_METHOD_WITH_VALUES] = {1, {_RECORD_TOS_TYPE_INDEX}},
[LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = {1, {_RECORD_TOS_TYPE_INDEX}},
[LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = {1, {_RECORD_TOS_TYPE_INDEX}},
[LOAD_ATTR_METHOD_LAZY_DICT] = {1, {_RECORD_TOS_TYPE_INDEX}},
+ [CALL] = {1, {_RECORD_CALLABLE_INDEX}},
[CALL_PY_GENERAL] = {1, {_RECORD_CALLABLE_INDEX}},
- [CALL_BOUND_METHOD_GENERAL] = {1, {_RECORD_BOUND_METHOD_INDEX}},
+ [CALL_BOUND_METHOD_GENERAL] = {1, {_RECORD_CALLABLE_INDEX}},
[CALL_NON_PY_GENERAL] = {1, {_RECORD_CALLABLE_INDEX}},
- [CALL_BOUND_METHOD_EXACT_ARGS] = {1, {_RECORD_BOUND_METHOD_INDEX}},
+ [CALL_BOUND_METHOD_EXACT_ARGS] = {1, {_RECORD_CALLABLE_INDEX}},
[CALL_PY_EXACT_ARGS] = {1, {_RECORD_CALLABLE_INDEX}},
+ [CALL_TYPE_1] = {1, {_RECORD_CALLABLE_INDEX}},
+ [CALL_STR_1] = {1, {_RECORD_CALLABLE_INDEX}},
+ [CALL_TUPLE_1] = {1, {_RECORD_CALLABLE_INDEX}},
[CALL_ALLOC_AND_ENTER_INIT] = {1, {_RECORD_CALLABLE_INDEX}},
[CALL_BUILTIN_CLASS] = {1, {_RECORD_CALLABLE_INDEX}},
[CALL_BUILTIN_O] = {1, {_RECORD_CALLABLE_INDEX}},
[CALL_BUILTIN_FAST] = {1, {_RECORD_CALLABLE_INDEX}},
[CALL_BUILTIN_FAST_WITH_KEYWORDS] = {1, {_RECORD_CALLABLE_INDEX}},
+ [CALL_LEN] = {1, {_RECORD_CALLABLE_INDEX}},
+ [CALL_ISINSTANCE] = {1, {_RECORD_CALLABLE_INDEX}},
+ [CALL_LIST_APPEND] = {1, {_RECORD_CALLABLE_INDEX}},
[CALL_METHOD_DESCRIPTOR_O] = {1, {_RECORD_CALLABLE_INDEX}},
[CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = {1, {_RECORD_CALLABLE_INDEX}},
[CALL_METHOD_DESCRIPTOR_NOARGS] = {1, {_RECORD_CALLABLE_INDEX}},
+ [CALL_METHOD_DESCRIPTOR_FAST] = {1, {_RECORD_CALLABLE_INDEX}},
[CALL_KW_PY] = {1, {_RECORD_CALLABLE_KW_INDEX}},
[CALL_KW_BOUND_METHOD] = {1, {_RECORD_CALLABLE_KW_INDEX}},
+ [CALL_KW] = {1, {_RECORD_CALLABLE_KW_INDEX}},
+ [CALL_KW_NON_PY] = {1, {_RECORD_CALLABLE_KW_INDEX}},
+ [CALL_FUNCTION_EX] = {1, {_RECORD_4OS_INDEX}},
[CALL_EX_PY] = {1, {_RECORD_4OS_INDEX}},
- [BINARY_OP] = {2, {_RECORD_TOS_TYPE_INDEX, _RECORD_NOS_TYPE_INDEX}},
+ [CALL_EX_NON_PY_GENERAL] = {1, {_RECORD_4OS_INDEX}},
+ [BINARY_OP] = {2, {_RECORD_NOS_INDEX, _RECORD_TOS_TYPE_INDEX}},
};
-const _Py_RecordFuncPtr _PyOpcode_RecordFunctions[10] = {
+const _PyOpcodeRecordSlotMap _PyOpcode_RecordSlotMaps[256] = {
+ [TO_BOOL_ALWAYS_TRUE] = {1, 0, {0}},
+ [BINARY_OP_SUBSCR_GETITEM] = {1, 0, {0}},
+ [SEND_GEN] = {1, 0, {0}},
+ [LOAD_SUPER_ATTR_METHOD] = {1, 0, {0}},
+ [LOAD_ATTR_INSTANCE_VALUE] = {1, 0, {0}},
+ [LOAD_ATTR_WITH_HINT] = {1, 0, {0}},
+ [LOAD_ATTR_SLOT] = {1, 0, {0}},
+ [LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = {1, 0, {0}},
+ [LOAD_ATTR_PROPERTY] = {1, 0, {0}},
+ [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = {1, 0, {0}},
+ [STORE_ATTR_INSTANCE_VALUE] = {1, 0, {0}},
+ [STORE_ATTR_WITH_HINT] = {1, 0, {0}},
+ [STORE_ATTR_SLOT] = {1, 0, {0}},
+ [GET_ITER] = {1, 0, {0}},
+ [GET_ITER_SELF] = {1, 0, {0}},
+ [GET_ITER_VIRTUAL] = {1, 0, {0}},
+ [FOR_ITER_GEN] = {1, 0, {0}},
+ [LOAD_SPECIAL] = {1, 0, {0}},
+ [LOAD_ATTR_METHOD_WITH_VALUES] = {1, 0, {0}},
+ [LOAD_ATTR_METHOD_NO_DICT] = {1, 0, {0}},
+ [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = {1, 0, {0}},
+ [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = {1, 0, {0}},
+ [LOAD_ATTR_METHOD_LAZY_DICT] = {1, 0, {0}},
+ [CALL_PY_GENERAL] = {1, 0, {0}},
+ [CALL_BOUND_METHOD_GENERAL] = {1, 1, {0}},
+ [CALL_NON_PY_GENERAL] = {1, 0, {0}},
+ [CALL_BOUND_METHOD_EXACT_ARGS] = {1, 1, {0}},
+ [CALL_PY_EXACT_ARGS] = {1, 0, {0}},
+ [CALL_ALLOC_AND_ENTER_INIT] = {1, 0, {0}},
+ [CALL_BUILTIN_CLASS] = {1, 0, {0}},
+ [CALL_BUILTIN_O] = {1, 0, {0}},
+ [CALL_BUILTIN_FAST] = {1, 0, {0}},
+ [CALL_BUILTIN_FAST_WITH_KEYWORDS] = {1, 0, {0}},
+ [CALL_METHOD_DESCRIPTOR_O] = {1, 0, {0}},
+ [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = {1, 0, {0}},
+ [CALL_METHOD_DESCRIPTOR_NOARGS] = {1, 0, {0}},
+ [CALL_KW_PY] = {1, 0, {0}},
+ [CALL_KW_BOUND_METHOD] = {1, 0, {0}},
+ [CALL_EX_PY] = {1, 0, {0}},
+ [BINARY_OP] = {2, 2, {1, 0}},
+};
+
+const _Py_RecordFuncPtr _PyOpcode_RecordFunctions[8] = {
[0] = NULL,
[_RECORD_TOS_TYPE_INDEX] = _PyOpcode_RecordFunction_TOS_TYPE,
[_RECORD_NOS_INDEX] = _PyOpcode_RecordFunction_NOS,
[_RECORD_3OS_GEN_FUNC_INDEX] = _PyOpcode_RecordFunction_3OS_GEN_FUNC,
[_RECORD_NOS_GEN_FUNC_INDEX] = _PyOpcode_RecordFunction_NOS_GEN_FUNC,
[_RECORD_CALLABLE_INDEX] = _PyOpcode_RecordFunction_CALLABLE,
- [_RECORD_BOUND_METHOD_INDEX] = _PyOpcode_RecordFunction_BOUND_METHOD,
[_RECORD_CALLABLE_KW_INDEX] = _PyOpcode_RecordFunction_CALLABLE_KW,
[_RECORD_4OS_INDEX] = _PyOpcode_RecordFunction_4OS,
- [_RECORD_NOS_TYPE_INDEX] = _PyOpcode_RecordFunction_NOS_TYPE,
};
+
+PyObject *
+_PyOpcode_RecordTransformValue(int uop, PyObject *value)
+{
+ switch (uop) {
+ case _RECORD_TOS_TYPE:
+ case _RECORD_NOS_TYPE:
+ return record_trace_transform_to_type(value);
+ case _RECORD_NOS_GEN_FUNC:
+ case _RECORD_3OS_GEN_FUNC:
+ return record_trace_transform_gen_func(value);
+ case _RECORD_BOUND_METHOD:
+ return record_trace_transform_bound_method(value);
+ default:
+ return value;
+ }
+}
# Must match MAX_RECORDED_VALUES in Include/internal/pycore_optimizer.h.
MAX_RECORDED_VALUES = 3
+# Map `_RECORD_*` uops to the helper that converts a raw family-recorded
+# value to the form the specialized member consumes.
+_RECORD_TRANSFORM_HELPERS: dict[str, str] = {
+ "_RECORD_TOS_TYPE": "record_trace_transform_to_type",
+ "_RECORD_NOS_TYPE": "record_trace_transform_to_type",
+ "_RECORD_NOS_GEN_FUNC": "record_trace_transform_gen_func",
+ "_RECORD_3OS_GEN_FUNC": "record_trace_transform_gen_func",
+ "_RECORD_BOUND_METHOD": "record_trace_transform_bound_method",
+}
+
+# Recorder uops whose slot kind differs from the leading word of their name.
+_RECORD_SLOT_KIND_OVERRIDES: dict[str, str] = {
+ "_RECORD_BOUND_METHOD": "CALLABLE",
+}
+
class RecorderEmitter(Emitter):
def __init__(self, out: CWriter):
return True
+def get_record_slot_kind(record_name: str) -> str:
+ if record_name in _RECORD_SLOT_KIND_OVERRIDES:
+ return _RECORD_SLOT_KIND_OVERRIDES[record_name]
+ if not record_name.startswith("_RECORD_"):
+ return record_name
+ return record_name.removeprefix("_RECORD_").partition("_")[0]
+
+
+def get_instruction_record_names(inst: Instruction) -> list[str]:
+ return [part.name for part in inst.parts if part.properties.records_value]
+
+
+def get_family_record_names(
+ family_head: Instruction,
+ family_members: list[Instruction],
+ instruction_records: dict[str, list[str]],
+ record_slot_keys: dict[str, str],
+) -> list[str]:
+ member_records = [instruction_records[m.name] for m in family_members]
+ all_member_names = {n for names in member_records for n in names}
+ records: list[str] = []
+ slot_index: dict[str, int] = {}
+
+ def add(name: str) -> None:
+ kind = record_slot_keys[name]
+ # Prefer the raw recorder if any member uses it; otherwise the given form.
+ raw = f"_RECORD_{kind}"
+ source = raw if raw in all_member_names else name
+ existing = slot_index.get(kind)
+ if existing is None:
+ slot_index[kind] = len(records)
+ records.append(source)
+ elif records[existing] != source:
+ raise ValueError(
+ f"Family {family_head.name} has incompatible recorders for "
+ f"slot {kind}: {records[existing]} and {source}"
+ )
+
+ for names in member_records:
+ for name in names:
+ add(name)
+ # Family head supplies any slots no member exercises.
+ for name in instruction_records[family_head.name]:
+ if record_slot_keys[name] not in slot_index:
+ slot_index[record_slot_keys[name]] = len(records)
+ records.append(name)
+ return records
+
+
+def get_record_consumer_layout(
+ inst_name: str,
+ source_records: list[str],
+ own_records: list[str],
+ record_slot_keys: dict[str, str],
+) -> tuple[list[int], int]:
+ used = [False] * len(source_records)
+ slot_map: list[int] = []
+ transform_mask = 0
+ for i, own in enumerate(own_records):
+ own_kind = record_slot_keys[own]
+ for j, src in enumerate(source_records):
+ if not used[j] and record_slot_keys[src] == own_kind:
+ used[j] = True
+ slot_map.append(j)
+ if src != own:
+ transform_mask |= 1 << i
+ break
+ else:
+ raise ValueError(
+ f"Instruction {inst_name} has no compatible family slot for "
+ f"{own} in {source_records}"
+ )
+ return slot_map, transform_mask
+
def generate_recorder_functions(filenames: list[str], analysis: Analysis, out: CWriter) -> None:
- write_header(__file__, filenames, outfile)
- outfile.write(
+ write_header(__file__, filenames, out.out)
+ out.out.write(
"""
#ifdef TIER_ONE
#error "This file is for Tier 2 only"
)
args = "_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, int oparg, PyObject **recorded_value"
emitter = RecorderEmitter(out)
- func_count = 0
nop = analysis.instructions["NOP"]
- function_table: dict[str, int] = dict()
- for name, uop in analysis.uops.items():
+ for uop in analysis.uops.values():
if not uop.properties.records_value:
continue
- func_count += 1
out.emit(f"void _PyOpcode_RecordFunction{uop.name[7:]}({args}) {{\n")
seen = {"unused"}
for var in uop.stack.inputs:
out.emit("\n\n")
def generate_recorder_tables(analysis: Analysis, out: CWriter) -> None:
- record_function_indexes: dict[str, int] = dict()
+ instruction_records = {
+ inst.name: get_instruction_record_names(inst)
+ for inst in analysis.instructions.values()
+ }
+ record_uop_names = [
+ name for name, uop in analysis.uops.items() if uop.properties.records_value
+ ]
+ record_slot_keys = {name: get_record_slot_kind(name) for name in record_uop_names}
+ family_record_table = {
+ family.name: get_family_record_names(
+ analysis.instructions[family.name],
+ family.members,
+ instruction_records,
+ record_slot_keys,
+ )
+ for family in analysis.families.values()
+ }
+
record_table: dict[str, list[str]] = {}
- index = 1
+ record_consumer_table: dict[str, tuple[list[int], int]] = {}
+ record_function_indexes: dict[str, int] = {}
for inst in analysis.instructions.values():
- if not inst.properties.records_value:
+ own_records = instruction_records[inst.name]
+ # TRACE_RECORD runs before execution, but specialization may rewrite
+ # the opcode before translation. Record the raw family shape (union
+ # of head + members) so any opcode in the family can be translated
+ # from the same recorded layout.
+ family = inst.family or analysis.families.get(inst.name)
+ records = family_record_table[family.name] if family is not None else own_records
+ if not records:
continue
- records: list[str] = []
- for part in inst.parts:
- if not part.properties.records_value:
- continue
- if part.name not in record_function_indexes:
- record_function_indexes[part.name] = index
- index += 1
- records.append(part.name)
- if records:
- if len(records) > MAX_RECORDED_VALUES:
- raise ValueError(
- f"Instruction {inst.name} has {len(records)} recording ops, "
- f"exceeds MAX_RECORDED_VALUES ({MAX_RECORDED_VALUES})"
- )
- record_table[inst.name] = records
- func_count = len(record_function_indexes)
+ if len(records) > MAX_RECORDED_VALUES:
+ raise ValueError(
+ f"Instruction {inst.name} has {len(records)} recording ops, "
+ f"exceeds MAX_RECORDED_VALUES ({MAX_RECORDED_VALUES})"
+ )
+ record_table[inst.name] = records
+ for name in records:
+ if name not in record_function_indexes:
+ record_function_indexes[name] = len(record_function_indexes) + 1
+ if own_records:
+ record_consumer_table[inst.name] = get_record_consumer_layout(
+ inst.name, records, own_records, record_slot_keys
+ )
for name, index in record_function_indexes.items():
out.emit(f"#define {name}_INDEX {index}\n")
out.emit("\n")
+
out.emit("const _PyOpcodeRecordEntry _PyOpcode_RecordEntries[256] = {\n")
- for inst_name, record_names in record_table.items():
- indices = ", ".join(f"{name}_INDEX" for name in record_names)
- out.emit(f" [{inst_name}] = {{{len(record_names)}, {{{indices}}}}},\n")
+ for inst_name, records in record_table.items():
+ indices = ", ".join(f"{name}_INDEX" for name in records)
+ out.emit(f" [{inst_name}] = {{{len(records)}, {{{indices}}}}},\n")
+ out.emit("};\n\n")
+
+ out.emit("const _PyOpcodeRecordSlotMap _PyOpcode_RecordSlotMaps[256] = {\n")
+ for inst_name, (slots, mask) in record_consumer_table.items():
+ slot_list = ", ".join(str(s) for s in slots)
+ out.emit(
+ f" [{inst_name}] = {{{len(slots)}, {mask}, {{{slot_list}}}}},\n"
+ )
out.emit("};\n\n")
- out.emit(f"const _Py_RecordFuncPtr _PyOpcode_RecordFunctions[{func_count+1}] = {{\n")
+
+ out.emit(
+ f"const _Py_RecordFuncPtr _PyOpcode_RecordFunctions"
+ f"[{len(record_function_indexes) + 1}] = {{\n"
+ )
out.emit(" [0] = NULL,\n")
for name in record_function_indexes:
out.emit(f" [{name}_INDEX] = _PyOpcode_RecordFunction{name[7:]},\n")
out.emit("};\n")
+ generate_record_transform_dispatcher(record_uop_names, out)
+
+
+def generate_record_transform_dispatcher(
+ record_uop_names: list[str], out: CWriter
+) -> None:
+ """Emit a switch that converts a family-recorded value for a recorder uop.
+
+ Only `_RECORD_*` uops that need conversion get a case; the default
+ returns the input value unchanged. Helpers live in Python/optimizer.c.
+ """
+ cases: dict[str, list[str]] = {}
+ for record_name in record_uop_names:
+ helper = _RECORD_TRANSFORM_HELPERS.get(record_name)
+ if helper is None:
+ continue
+ cases.setdefault(helper, []).append(record_name)
+ out.emit("\n")
+ out.emit(
+ "PyObject *\n"
+ "_PyOpcode_RecordTransformValue(int uop, PyObject *value)\n"
+ "{\n"
+ )
+ out.emit(" switch (uop) {\n")
+ for helper, names in cases.items():
+ for name in names:
+ out.emit(f" case {name}:\n")
+ out.emit(f" return {helper}(value);\n")
+ out.emit(" default:\n")
+ out.emit(" return value;\n")
+ out.emit(" }\n")
+ out.emit("}\n")
arg_parser = argparse.ArgumentParser(