"[OP_TYPED_SPECIALIZED] = {1, 0, {0}}",
)
+ def test_record_transform_generated_from_recording_uop(self):
+ input = """
+ tier2 op(_RECORD_TOS, (tos -- tos)) {
+ RECORD_VALUE(PyStackRef_AsPyObjectBorrow(tos));
+ }
+ tier2 op(_RECORD_TOS_TYPE, (tos -- tos)) {
+ RECORD_VALUE(Py_TYPE(PyStackRef_AsPyObjectBorrow(tos)));
+ }
+ op(_DO_STUFF, (tos -- res)) {
+ res = tos;
+ }
+ macro(OP) = _RECORD_TOS + _DO_STUFF;
+ macro(OP_SPECIALIZED) = _RECORD_TOS_TYPE + _DO_STUFF;
+ family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
+ """
+ output = self.generate_tables(input)
+ self.assertIn("_PyOpcode_RecordTransform_TOS_TYPE", output)
+ self.assertIn("tos = PyStackRef_FromPyObjectBorrow(recorded_value);", output)
+ self.assertIn(
+ "transformed_value = (PyObject *)Py_TYPE(PyStackRef_AsPyObjectBorrow(tos));",
+ output,
+ )
+ self.assertIn("return _PyOpcode_RecordTransform_TOS_TYPE(value);", output)
+ self.assertNotIn("record_trace_transform_to_type", output)
+
+ def test_record_transform_generated_when_only_specialization_records(self):
+ input = """
+ tier2 op(_RECORD_TOS_TYPE, (tos -- tos)) {
+ RECORD_VALUE(Py_TYPE(PyStackRef_AsPyObjectBorrow(tos)));
+ }
+ op(_DO_STUFF, (tos -- res)) {
+ res = tos;
+ }
+ macro(OP) = _DO_STUFF;
+ macro(OP_SPECIALIZED) = _RECORD_TOS_TYPE + _DO_STUFF;
+ family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
+ """
+ output = self.generate_tables(input)
+ # Family head must adopt the specialization's recorder.
+ self.assertIn("[OP] = {1, {_RECORD_TOS_TYPE_INDEX}}", output)
+ self.assertIn("[OP_SPECIALIZED] = {1, {_RECORD_TOS_TYPE_INDEX}}", output)
+ # Specialization consumes the slot directly (mask 0), no transform.
+ self.assert_slot_map_lines(output, "[OP_SPECIALIZED] = {1, 0, {0}}")
+ self.assertNotIn("_PyOpcode_RecordTransform_TOS_TYPE(", output)
+
+ def test_no_record_transform_when_only_base_records(self):
+ input = """
+ tier2 op(_RECORD_TOS, (tos -- tos)) {
+ RECORD_VALUE(PyStackRef_AsPyObjectBorrow(tos));
+ }
+ op(_DO_STUFF, (tos -- res)) {
+ res = tos;
+ }
+ macro(OP) = _RECORD_TOS + _DO_STUFF;
+ macro(OP_SPECIALIZED) = _DO_STUFF;
+ family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
+ """
+ output = self.generate_tables(input)
+ # Family head records via _RECORD_TOS.
+ self.assertIn("[OP] = {1, {_RECORD_TOS_INDEX}}", output)
+ self.assertIn("[OP_SPECIALIZED] = {1, {_RECORD_TOS_INDEX}}", output)
+ # Specialization has no consumer slot map entry (it doesn't read it).
+ self.assertNotIn(
+ "[OP_SPECIALIZED] = {1,", self.get_slot_map_section(output)
+ )
+ # No transform helpers are generated.
+ self.assertNotIn("_PyOpcode_RecordTransform_TOS(", output)
+ self.assertNotIn("_PyOpcode_RecordTransform_TOS_TYPE", output)
+
def test_family_member_maps_positional_recorders_to_family_slots(self):
input = """
tier2 op(_RECORD_TOS, (sub -- sub)) {
macro(OP_SPECIALIZED) = _RECORD_TOS + _DO_STUFF;
family(OP, INLINE_CACHE_ENTRIES_OP) = { OP_SPECIALIZED };
"""
+ analysis = self.analyze_input(input)
output = self.generate_tables(input)
+ self.assertEqual(
+ analysis.families["OP"].get_member_record_names(),
+ ("_RECORD_TOS",),
+ )
self.assertIn("[OP] = {1, {_RECORD_TOS_INDEX}}", output)
self.assertIn("[OP_SPECIALIZED] = {1, {_RECORD_TOS_INDEX}}", output)
self.assert_slot_map_lines(output, "[OP_SPECIALIZED] = {1, 0, {0}}")
[_RECORD_4OS_INDEX] = _PyOpcode_RecordFunction_4OS,
};
+static PyObject *
+_PyOpcode_RecordTransform_NOS_TYPE(PyObject *recorded_value)
+{
+ PyObject *transformed_value = NULL;
+ _PyStackRef nos;
+ nos = PyStackRef_FromPyObjectBorrow(recorded_value);
+ transformed_value = (PyObject *)Py_TYPE(PyStackRef_AsPyObjectBorrow(nos));
+ Py_XINCREF(transformed_value);
+ Py_DECREF(recorded_value);
+ return transformed_value;
+}
+
+static PyObject *
+_PyOpcode_RecordTransform_TOS_TYPE(PyObject *recorded_value)
+{
+ PyObject *transformed_value = NULL;
+ _PyStackRef tos;
+ tos = PyStackRef_FromPyObjectBorrow(recorded_value);
+ transformed_value = (PyObject *)Py_TYPE(PyStackRef_AsPyObjectBorrow(tos));
+ Py_XINCREF(transformed_value);
+ Py_DECREF(recorded_value);
+ return transformed_value;
+}
+
+static PyObject *
+_PyOpcode_RecordTransform_NOS_GEN_FUNC(PyObject *recorded_value)
+{
+ PyObject *transformed_value = NULL;
+ _PyStackRef nos;
+ nos = PyStackRef_FromPyObjectBorrow(recorded_value);
+ PyObject *obj = PyStackRef_AsPyObjectBorrow(nos);
+ if (PyGen_Check(obj)) {
+ PyGenObject *gen = (PyGenObject *)obj;
+ _PyStackRef func = gen->gi_iframe.f_funcobj;
+ if (!PyStackRef_IsNull(func)) {
+ transformed_value = (PyObject *)PyStackRef_AsPyObjectBorrow(func);
+ Py_XINCREF(transformed_value);
+ }
+ }
+ Py_DECREF(recorded_value);
+ return transformed_value;
+}
+
+static PyObject *
+_PyOpcode_RecordTransform_BOUND_METHOD(PyObject *recorded_value)
+{
+ PyObject *transformed_value = NULL;
+ _PyStackRef callable;
+ callable = PyStackRef_FromPyObjectBorrow(recorded_value);
+ PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
+ if (Py_TYPE(callable_o) == &PyMethod_Type) {
+ transformed_value = (PyObject *)callable_o;
+ Py_XINCREF(transformed_value);
+ }
+ Py_DECREF(recorded_value);
+ return transformed_value;
+}
+
PyObject *
_PyOpcode_RecordTransformValue(int uop, PyObject *value)
{
- switch (uop) {
- case _RECORD_TOS_TYPE:
- case _RECORD_NOS_TYPE:
- return record_trace_transform_to_type(value);
- case _RECORD_NOS_GEN_FUNC:
- case _RECORD_3OS_GEN_FUNC:
- return record_trace_transform_gen_func(value);
- case _RECORD_BOUND_METHOD:
- return record_trace_transform_bound_method(value);
- default:
- return value;
- }
+ switch (uop) {
+ case _RECORD_NOS_TYPE:
+ return _PyOpcode_RecordTransform_NOS_TYPE(value);
+ case _RECORD_TOS_TYPE:
+ return _PyOpcode_RecordTransform_TOS_TYPE(value);
+ case _RECORD_NOS_GEN_FUNC:
+ return _PyOpcode_RecordTransform_NOS_GEN_FUNC(value);
+ case _RECORD_BOUND_METHOD:
+ return _PyOpcode_RecordTransform_BOUND_METHOD(value);
+ default:
+ return value;
+ }
}
from analyzer import (
Analysis,
+ Family,
Instruction,
+ Uop,
analyze_files,
CodeSection,
)
from cwriter import CWriter
from tier1_generator import write_uop, Emitter, declare_variable
-from typing import TextIO
from lexer import Token
from stack import Stack, Storage
# Must match MAX_RECORDED_VALUES in Include/internal/pycore_optimizer.h.
MAX_RECORDED_VALUES = 3
-# Map `_RECORD_*` uops to the helper that converts a raw family-recorded
-# value to the form the specialized member consumes.
-_RECORD_TRANSFORM_HELPERS: dict[str, str] = {
- "_RECORD_TOS_TYPE": "record_trace_transform_to_type",
- "_RECORD_NOS_TYPE": "record_trace_transform_to_type",
- "_RECORD_NOS_GEN_FUNC": "record_trace_transform_gen_func",
- "_RECORD_3OS_GEN_FUNC": "record_trace_transform_gen_func",
- "_RECORD_BOUND_METHOD": "record_trace_transform_bound_method",
-}
-
# Recorder uops whose slot kind differs from the leading word of their name.
_RECORD_SLOT_KIND_OVERRIDES: dict[str, str] = {
"_RECORD_BOUND_METHOD": "CALLABLE",
class RecorderEmitter(Emitter):
- def __init__(self, out: CWriter):
+ def __init__(self, out: CWriter, target: str, incref: str):
super().__init__(out, {})
self._replacers["RECORD_VALUE"] = self.record_value
+ self.target = target
+ self.incref = incref
def record_value(
self,
storage: Storage,
inst: Instruction | None,
) -> bool:
- lparen = next(tkn_iter)
+ next(tkn_iter)
self.out.start_line()
- self.emit("*recorded_value = (PyObject *)")
+ self.emit(f"{self.target} = (PyObject *)")
emit_to(self.out, tkn_iter, "RPAREN")
next(tkn_iter) # Semi colon
self.emit(";\n")
- self.emit("Py_INCREF(*recorded_value);\n")
+ self.emit(f"{self.incref}({self.target});\n")
return True
def get_family_record_names(
- family_head: Instruction,
- family_members: list[Instruction],
+ family: Family,
instruction_records: dict[str, list[str]],
record_slot_keys: dict[str, str],
) -> list[str]:
- member_records = [instruction_records[m.name] for m in family_members]
- head_records = instruction_records[family_head.name]
+ family_record_names = set(family.get_member_record_names())
+ family_record_names.update(instruction_records[family.name])
records: list[str] = []
slot_index: dict[str, int] = {}
def add(name: str) -> None:
kind = record_slot_keys[name]
+ # Prefer the raw recorder if any family instruction uses it.
+ raw = f"_RECORD_{kind}"
+ source = raw if raw in family_record_names else name
existing = slot_index.get(kind)
if existing is None:
slot_index[kind] = len(records)
- records.append(name)
- elif records[existing] != name:
- raw = f"_RECORD_{kind}"
+ records.append(source)
+ elif records[existing] != source:
if raw not in record_slot_keys:
raise ValueError(
- f"Family {family_head.name} has incompatible recorders for "
- f"slot {kind}: {records[existing]} and {name}, "
+ f"Family {family.name} has incompatible recorders for "
+ f"slot {kind}: {records[existing]} and {source}, "
f"and no raw recorder {raw} exists to use as a base."
)
records[existing] = raw
- for names in member_records:
- for name in names:
+ for member in family.members:
+ for name in instruction_records[member.name]:
add(name)
- for name in head_records:
+ # Family head supplies any slots no member exercises, and may also
+ # conflict with members (resolved via the raw recorder above).
+ for name in instruction_records[family.name]:
add(name)
return records
source_records: list[str],
own_records: list[str],
record_slot_keys: dict[str, str],
-) -> tuple[list[int], int]:
+) -> tuple[list[int], int, list[str]]:
used = [False] * len(source_records)
slot_map: list[int] = []
transform_mask = 0
+ transform_names: list[str] = []
for i, own in enumerate(own_records):
own_kind = record_slot_keys[own]
for j, src in enumerate(source_records):
slot_map.append(j)
if src != own:
transform_mask |= 1 << i
+ if own not in transform_names:
+ transform_names.append(own)
break
else:
raise ValueError(
f"Instruction {inst_name} has no compatible family slot for "
f"{own} in {source_records}"
)
- return slot_map, transform_mask
+ return slot_map, transform_mask, transform_names
+
+
+def get_record_transform_input(uop: Uop) -> str:
+ inputs = [var for var in uop.stack.inputs if var.used]
+ if len(inputs) != 1 or inputs[0].is_array():
+ raise ValueError(
+ f"Recorder transform for {uop.name} needs exactly one scalar input"
+ )
+ return inputs[0].name
+
+
+def generate_record_transform_function(uop: Uop, out: CWriter) -> None:
+ input_name = get_record_transform_input(uop)
+ out.emit("static PyObject *\n")
+ out.emit(f"_PyOpcode_RecordTransform{uop.name[7:]}(PyObject *recorded_value)\n")
+ out.emit("{\n")
+ out.emit("PyObject *transformed_value = NULL;\n")
+ for var in uop.stack.inputs:
+ if var.used:
+ declare_variable(var, out)
+ out.emit(f"{input_name} = PyStackRef_FromPyObjectBorrow(recorded_value);\n")
+ emitter = RecorderEmitter(out, "transformed_value", "Py_XINCREF")
+ emitter.emit_tokens(uop, Storage(Stack(), [], [], 0, False), None, False)
+ out.start_line()
+ out.emit("Py_DECREF(recorded_value);\n")
+ out.emit("return transformed_value;\n")
+ out.emit("}\n\n")
+
def generate_recorder_functions(filenames: list[str], analysis: Analysis, out: CWriter) -> None:
write_header(__file__, filenames, out.out)
"""
)
args = "_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, int oparg, PyObject **recorded_value"
- emitter = RecorderEmitter(out)
+ emitter = RecorderEmitter(out, "*recorded_value", "Py_INCREF")
nop = analysis.instructions["NOP"]
for uop in analysis.uops.values():
if not uop.properties.records_value:
record_slot_keys = {name: get_record_slot_kind(name) for name in record_uop_names}
family_record_table = {
family.name: get_family_record_names(
- analysis.instructions[family.name],
- family.members,
+ family,
instruction_records,
record_slot_keys,
)
record_table: dict[str, list[str]] = {}
record_consumer_table: dict[str, tuple[list[int], int]] = {}
record_function_indexes: dict[str, int] = {}
+ record_transform_names: list[str] = []
for inst in analysis.instructions.values():
own_records = instruction_records[inst.name]
# TRACE_RECORD runs before execution, but specialization may rewrite
- # the opcode before translation. Record the raw family shape (union
- # of head + members) so any opcode in the family can be translated
- # from the same recorded layout.
+ # the opcode before translation. Use the shared family recording shape
+ # so any opcode in the family can be translated from the same layout.
family = inst.family or analysis.families.get(inst.name)
records = family_record_table[family.name] if family is not None else own_records
if not records:
if name not in record_function_indexes:
record_function_indexes[name] = len(record_function_indexes) + 1
if own_records:
- record_consumer_table[inst.name] = get_record_consumer_layout(
+ slots, mask, transform_names = get_record_consumer_layout(
inst.name, records, own_records, record_slot_keys
)
+ record_consumer_table[inst.name] = (slots, mask)
+ for name in transform_names:
+ if name not in record_transform_names:
+ record_transform_names.append(name)
for name, index in record_function_indexes.items():
out.emit(f"#define {name}_INDEX {index}\n")
for name in record_function_indexes:
out.emit(f" [{name}_INDEX] = _PyOpcode_RecordFunction{name[7:]},\n")
out.emit("};\n")
- generate_record_transform_dispatcher(record_uop_names, out)
+ out.emit("\n")
+ for name in record_transform_names:
+ generate_record_transform_function(analysis.uops[name], out)
+ generate_record_transform_dispatcher(record_transform_names, out)
def generate_record_transform_dispatcher(
- record_uop_names: list[str], out: CWriter
+ transform_names: list[str], out: CWriter
) -> None:
"""Emit a switch that converts a family-recorded value for a recorder uop.
- Only `_RECORD_*` uops that need conversion get a case; the default
- returns the input value unchanged. Helpers live in Python/optimizer.c.
+ Only `_RECORD_*` uops that need conversion get a case; the default returns
+ the input value unchanged.
"""
- cases: dict[str, list[str]] = {}
- for record_name in record_uop_names:
- helper = _RECORD_TRANSFORM_HELPERS.get(record_name)
- if helper is None:
- continue
- cases.setdefault(helper, []).append(record_name)
- out.emit("\n")
out.emit(
"PyObject *\n"
"_PyOpcode_RecordTransformValue(int uop, PyObject *value)\n"
"{\n"
)
- out.emit(" switch (uop) {\n")
- for helper, names in cases.items():
- for name in names:
- out.emit(f" case {name}:\n")
- out.emit(f" return {helper}(value);\n")
- out.emit(" default:\n")
- out.emit(" return value;\n")
- out.emit(" }\n")
+ out.emit("switch (uop) {\n")
+ for name in transform_names:
+ out.emit(f"case {name}:\n")
+ out.emit(f" return _PyOpcode_RecordTransform{name[7:]}(value);\n")
+ out.emit("default:\n")
+ out.emit(" return value;\n")
+ out.emit("}\n")
out.emit("}\n")