From: Hai Zhu Date: Tue, 14 Apr 2026 11:26:53 +0000 (+0800) Subject: gh-148378: Allow multiple consecutive recording ops per macro op (GH-148496) X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=5ce0fe8b6ce01744ec253bf79016403ca76c1153;p=thirdparty%2FPython%2Fcpython.git gh-148378: Allow multiple consecutive recording ops per macro op (GH-148496) --- diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index e7b688333d9c..78bbdc2026e9 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -91,13 +91,15 @@ typedef struct _PyJitTracerInitialState { _Py_CODEUNIT *jump_backward_instr; } _PyJitTracerInitialState; +#define MAX_RECORDED_VALUES 3 typedef struct _PyJitTracerPreviousState { int instr_oparg; int instr_stacklevel; _Py_CODEUNIT *instr; PyCodeObject *instr_code; // Strong struct _PyInterpreterFrame *instr_frame; - PyObject *recorded_value; // Strong, may be NULL + PyObject *recorded_values[MAX_RECORDED_VALUES]; // Strong, may be NULL + int recorded_count; } _PyJitTracerPreviousState; typedef struct _PyJitTracerTranslatorState { @@ -481,7 +483,12 @@ void _PyJit_TracerFree(_PyThreadStateImpl *_tstate); #ifdef _Py_TIER2 typedef void (*_Py_RecordFuncPtr)(_PyInterpreterFrame *frame, _PyStackRef *stackpointer, int oparg, PyObject **recorded_value); PyAPI_DATA(const _Py_RecordFuncPtr) _PyOpcode_RecordFunctions[]; -PyAPI_DATA(const uint8_t) _PyOpcode_RecordFunctionIndices[256]; + +typedef struct { + uint8_t count; + uint8_t indices[MAX_RECORDED_VALUES]; +} _PyOpcodeRecordEntry; +PyAPI_DATA(const _PyOpcodeRecordEntry) _PyOpcode_RecordEntries[256]; #endif #ifdef __cplusplus diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index 33fae682a3ce..62cf0c0c6af0 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -29,12 +29,13 @@ skip_if_different_mount_drives() test_tools.skip_if_missing("cases_generator") with test_tools.imports_under_tool("cases_generator"): - from analyzer import StackItem + from analyzer import StackItem, analyze_files from cwriter import CWriter import parser from stack import Local, Stack import tier1_generator import optimizer_generator + import record_function_generator def handle_stderr(): @@ -1948,6 +1949,202 @@ class TestGeneratedCases(unittest.TestCase): with self.assertRaisesRegex(SyntaxError, "Recording uop"): self.run_cases_test(input, "") + def test_multiple_consecutive_recording_uops(self): + """Multiple consecutive recording uops at the start of a macro are legal.""" + input = """ + tier2 op(_RECORD_A, (a, b -- a, b)) { + RECORD_VALUE(a); + } + tier2 op(_RECORD_B, (a, b -- a, b)) { + RECORD_VALUE(b); + } + op(_DO_STUFF, (a, b -- res)) { + res = a; + INPUTS_DEAD(); + } + macro(OP) = _RECORD_A + _RECORD_B + _DO_STUFF; + """ + output = """ + TARGET(OP) { + #if _Py_TAIL_CALL_INTERP + int opcode = OP; + (void)(opcode); + #endif + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(OP); + _PyStackRef a; + _PyStackRef res; + // _DO_STUFF + { + a = stack_pointer[-2]; + res = a; + } + stack_pointer[-2] = res; + stack_pointer += -1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + DISPATCH(); + } + """ + self.run_cases_test(input, output) + + def test_multiple_recording_uops_after_specializing(self): + """Multiple recording uops after a specializing uop are legal.""" + input = """ + specializing op(_SPECIALIZE_OP, (counter/1, a, b -- a, b)) { + SPAM(); + } + tier2 op(_RECORD_A, (a, b -- a, b)) { + RECORD_VALUE(a); + } + tier2 op(_RECORD_B, (a, b -- a, b)) { + RECORD_VALUE(b); + } + op(_DO_STUFF, (a, b -- res)) { + res = a; + INPUTS_DEAD(); + } + macro(OP) = _SPECIALIZE_OP + _RECORD_A + _RECORD_B + unused/2 + _DO_STUFF; + """ + output = """ + TARGET(OP) { + #if _Py_TAIL_CALL_INTERP + int opcode = OP; + (void)(opcode); + #endif + _Py_CODEUNIT* const this_instr = next_instr; + (void)this_instr; + frame->instr_ptr = next_instr; + next_instr += 4; + INSTRUCTION_STATS(OP); + _PyStackRef a; + _PyStackRef res; + // _SPECIALIZE_OP + { + uint16_t counter = read_u16(&this_instr[1].cache); + (void)counter; + SPAM(); + } + /* Skip 2 cache entries */ + // _DO_STUFF + { + a = stack_pointer[-2]; + res = a; + } + stack_pointer[-2] = res; + stack_pointer += -1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + DISPATCH(); + } + """ + self.run_cases_test(input, output) + + def test_recording_uop_between_real_uops_rejected(self): + """A recording uop sandwiched between real uops is rejected.""" + input = """ + tier2 op(_RECORD_A, (a, b -- a, b)) { + RECORD_VALUE(a); + } + op(_FIRST, (a, b -- a, b)) { + first(a); + } + tier2 op(_RECORD_B, (a, b -- a, b)) { + RECORD_VALUE(b); + } + macro(OP) = _RECORD_A + _FIRST + _RECORD_B; + """ + with self.assertRaisesRegex(SyntaxError, + "must precede all " + "non-recording, non-specializing uops"): + self.run_cases_test(input, "") + + +class TestRecorderTableGeneration(unittest.TestCase): + + def setUp(self) -> None: + super().setUp() + self.maxDiff = None + self.temp_dir = tempfile.gettempdir() + self.temp_input_filename = os.path.join(self.temp_dir, "input.txt") + + def tearDown(self) -> None: + try: + os.remove(self.temp_input_filename) + except FileNotFoundError: + pass + super().tearDown() + + def generate_tables(self, input: str) -> str: + import io + with open(self.temp_input_filename, "w+") as f: + f.write(parser.BEGIN_MARKER) + f.write(input) + f.write(parser.END_MARKER) + with handle_stderr(): + analysis = analyze_files([self.temp_input_filename]) + buf = io.StringIO() + out = CWriter(buf, 0, False) + record_function_generator.generate_recorder_tables(analysis, out) + return buf.getvalue() + + def test_single_recording_uop_generates_count(self): + input = """ + tier2 op(_RECORD_TOS, (value -- value)) { + RECORD_VALUE(value); + } + op(_DO_STUFF, (value -- res)) { + res = value; + } + macro(OP) = _RECORD_TOS + _DO_STUFF; + """ + output = self.generate_tables(input) + self.assertIn("_RECORD_TOS_INDEX", output) + self.assertIn("[OP] = {1, {_RECORD_TOS_INDEX}}", output) + + def test_three_recording_uops_generate_count_3_in_order(self): + input = """ + tier2 op(_RECORD_X, (a, b, c -- a, b, c)) { + RECORD_VALUE(a); + } + tier2 op(_RECORD_Y, (a, b, c -- a, b, c)) { + RECORD_VALUE(b); + } + tier2 op(_RECORD_Z, (a, b, c -- a, b, c)) { + RECORD_VALUE(c); + } + op(_DO_STUFF, (a, b, c -- res)) { + res = a; + } + macro(OP) = _RECORD_X + _RECORD_Y + _RECORD_Z + _DO_STUFF; + """ + output = self.generate_tables(input) + self.assertIn( + "[OP] = {3, {_RECORD_X_INDEX, _RECORD_Y_INDEX, _RECORD_Z_INDEX}}", + output, + ) + + def test_four_recording_uops_rejected(self): + input = """ + tier2 op(_RECORD_A, (a, b, c, d -- a, b, c, d)) { + RECORD_VALUE(a); + } + tier2 op(_RECORD_B, (a, b, c, d -- a, b, c, d)) { + RECORD_VALUE(b); + } + tier2 op(_RECORD_C, (a, b, c, d -- a, b, c, d)) { + RECORD_VALUE(c); + } + tier2 op(_RECORD_D, (a, b, c, d -- a, b, c, d)) { + RECORD_VALUE(d); + } + op(_DO_STUFF, (a, b, c, d -- res)) { + res = a; + } + macro(OP) = _RECORD_A + _RECORD_B + _RECORD_C + _RECORD_D + _DO_STUFF; + """ + with self.assertRaisesRegex(ValueError, "exceeds MAX_RECORDED_VALUES"): + self.generate_tables(input) + class TestGeneratedAbstractCases(unittest.TestCase): def setUp(self) -> None: diff --git a/Modules/_testinternalcapi/test_cases.c.h b/Modules/_testinternalcapi/test_cases.c.h index 041adcff388a..8bf751026bd3 100644 --- a/Modules/_testinternalcapi/test_cases.c.h +++ b/Modules/_testinternalcapi/test_cases.c.h @@ -12317,9 +12317,12 @@ } DISPATCH(); } - _PyFrame_SetStackPointer(frame, stack_pointer); - Py_CLEAR(tracer->prev_state.recorded_value); - stack_pointer = _PyFrame_GetStackPointer(frame); + for (int i = 0; i < tracer->prev_state.recorded_count; i++) { + _PyFrame_SetStackPointer(frame, stack_pointer); + Py_CLEAR(tracer->prev_state.recorded_values[i]); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + tracer->prev_state.recorded_count = 0; tracer->prev_state.instr = next_instr; PyObject *prev_code = PyStackRef_AsPyObjectBorrow(frame->f_executable); if (tracer->prev_state.instr_code != (PyCodeObject *)prev_code) { @@ -12333,11 +12336,12 @@ if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]) { (&next_instr[1])->counter = trigger_backoff_counter(); } - uint8_t record_func_index = _PyOpcode_RecordFunctionIndices[opcode]; - if (record_func_index) { - _Py_RecordFuncPtr doesnt_escape = _PyOpcode_RecordFunctions[record_func_index]; - doesnt_escape(frame, stack_pointer, oparg, &tracer->prev_state.recorded_value); + const _PyOpcodeRecordEntry *record_entry = &_PyOpcode_RecordEntries[opcode]; + for (int i = 0; i < record_entry->count; i++) { + _Py_RecordFuncPtr doesnt_escape = _PyOpcode_RecordFunctions[record_entry->indices[i]]; + doesnt_escape(frame, stack_pointer, oparg, &tracer->prev_state.recorded_values[i]); } + tracer->prev_state.recorded_count = record_entry->count; DISPATCH_GOTO_NON_TRACING(); #else (void)prev_instr; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index b6526b08c0a8..57731fc65ed3 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -6349,7 +6349,10 @@ dummy_func( ERROR_IF(err < 0); DISPATCH(); } - Py_CLEAR(tracer->prev_state.recorded_value); + for (int i = 0; i < tracer->prev_state.recorded_count; i++) { + Py_CLEAR(tracer->prev_state.recorded_values[i]); + } + tracer->prev_state.recorded_count = 0; tracer->prev_state.instr = next_instr; PyObject *prev_code = PyStackRef_AsPyObjectBorrow(frame->f_executable); if (tracer->prev_state.instr_code != (PyCodeObject *)prev_code) { @@ -6363,11 +6366,12 @@ dummy_func( (&next_instr[1])->counter = trigger_backoff_counter(); } - uint8_t record_func_index = _PyOpcode_RecordFunctionIndices[opcode]; - if (record_func_index) { - _Py_RecordFuncPtr doesnt_escape = _PyOpcode_RecordFunctions[record_func_index]; - doesnt_escape(frame, stack_pointer, oparg, &tracer->prev_state.recorded_value); + const _PyOpcodeRecordEntry *record_entry = &_PyOpcode_RecordEntries[opcode]; + for (int i = 0; i < record_entry->count; i++) { + _Py_RecordFuncPtr doesnt_escape = _PyOpcode_RecordFunctions[record_entry->indices[i]]; + doesnt_escape(frame, stack_pointer, oparg, &tracer->prev_state.recorded_values[i]); } + tracer->prev_state.recorded_count = record_entry->count; DISPATCH_GOTO_NON_TRACING(); #else (void)prev_instr; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index ae01925077c2..d710e82a306a 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -12314,9 +12314,12 @@ } DISPATCH(); } - _PyFrame_SetStackPointer(frame, stack_pointer); - Py_CLEAR(tracer->prev_state.recorded_value); - stack_pointer = _PyFrame_GetStackPointer(frame); + for (int i = 0; i < tracer->prev_state.recorded_count; i++) { + _PyFrame_SetStackPointer(frame, stack_pointer); + Py_CLEAR(tracer->prev_state.recorded_values[i]); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + tracer->prev_state.recorded_count = 0; tracer->prev_state.instr = next_instr; PyObject *prev_code = PyStackRef_AsPyObjectBorrow(frame->f_executable); if (tracer->prev_state.instr_code != (PyCodeObject *)prev_code) { @@ -12330,11 +12333,12 @@ if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]) { (&next_instr[1])->counter = trigger_backoff_counter(); } - uint8_t record_func_index = _PyOpcode_RecordFunctionIndices[opcode]; - if (record_func_index) { - _Py_RecordFuncPtr doesnt_escape = _PyOpcode_RecordFunctions[record_func_index]; - doesnt_escape(frame, stack_pointer, oparg, &tracer->prev_state.recorded_value); + const _PyOpcodeRecordEntry *record_entry = &_PyOpcode_RecordEntries[opcode]; + for (int i = 0; i < record_entry->count; i++) { + _Py_RecordFuncPtr doesnt_escape = _PyOpcode_RecordFunctions[record_entry->indices[i]]; + doesnt_escape(frame, stack_pointer, oparg, &tracer->prev_state.recorded_values[i]); } + tracer->prev_state.recorded_count = record_entry->count; DISPATCH_GOTO_NON_TRACING(); #else (void)prev_instr; diff --git a/Python/optimizer.c b/Python/optimizer.c index f09bf778587b..5d5aecda4e45 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -866,6 +866,7 @@ _PyJit_translate_single_bytecode_to_trace( assert(nuops > 0); uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM uint32_t orig_target = target; + int record_idx = 0; for (int i = 0; i < nuops; i++) { oparg = orig_oparg; target = orig_target; @@ -946,8 +947,9 @@ _PyJit_translate_single_bytecode_to_trace( operand = next->op.arg; } else if (_PyUop_Flags[uop] & HAS_RECORDS_VALUE_FLAG) { - PyObject *recorded_value = tracer->prev_state.recorded_value; - tracer->prev_state.recorded_value = NULL; + PyObject *recorded_value = tracer->prev_state.recorded_values[record_idx]; + tracer->prev_state.recorded_values[record_idx] = NULL; + record_idx++; operand = (uintptr_t)recorded_value; } // All other instructions @@ -1060,12 +1062,16 @@ _PyJit_TryInitializeTracing( tracer->prev_state.instr_frame = frame; tracer->prev_state.instr_oparg = oparg; tracer->prev_state.instr_stacklevel = tracer->initial_state.stack_depth; - tracer->prev_state.recorded_value = NULL; - uint8_t record_func_index = _PyOpcode_RecordFunctionIndices[curr_instr->op.code]; - if (record_func_index) { - _Py_RecordFuncPtr record_func = _PyOpcode_RecordFunctions[record_func_index]; - record_func(frame, stack_pointer, oparg, &tracer->prev_state.recorded_value); + tracer->prev_state.recorded_count = 0; + for (int i = 0; i < MAX_RECORDED_VALUES; i++) { + tracer->prev_state.recorded_values[i] = NULL; } + const _PyOpcodeRecordEntry *record_entry = &_PyOpcode_RecordEntries[curr_instr->op.code]; + for (int i = 0; i < record_entry->count; i++) { + _Py_RecordFuncPtr record_func = _PyOpcode_RecordFunctions[record_entry->indices[i]]; + record_func(frame, stack_pointer, oparg, &tracer->prev_state.recorded_values[i]); + } + tracer->prev_state.recorded_count = record_entry->count; assert(curr_instr->op.code == JUMP_BACKWARD_JIT || curr_instr->op.code == RESUME_CHECK_JIT || (exit != NULL)); tracer->initial_state.jump_backward_instr = curr_instr; @@ -1117,7 +1123,10 @@ _PyJit_FinalizeTracing(PyThreadState *tstate, int err) Py_CLEAR(tracer->initial_state.func); Py_CLEAR(tracer->initial_state.executor); Py_CLEAR(tracer->prev_state.instr_code); - Py_CLEAR(tracer->prev_state.recorded_value); + for (int i = 0; i < MAX_RECORDED_VALUES; i++) { + Py_CLEAR(tracer->prev_state.recorded_values[i]); + } + tracer->prev_state.recorded_count = 0; uop_buffer_init(buffer, &tracer->uop_array[0], UOP_MAX_TRACE_LENGTH); tracer->is_tracing = false; } diff --git a/Python/record_functions.c.h b/Python/record_functions.c.h index db03374b62f7..25bca4735726 100644 --- a/Python/record_functions.c.h +++ b/Python/record_functions.c.h @@ -99,41 +99,42 @@ void _PyOpcode_RecordFunction_CODE(_PyInterpreterFrame *frame, _PyStackRef *stac #define _RECORD_BOUND_METHOD_INDEX 6 #define _RECORD_CALLABLE_KW_INDEX 7 #define _RECORD_4OS_INDEX 8 -const uint8_t _PyOpcode_RecordFunctionIndices[256] = { - [TO_BOOL_ALWAYS_TRUE] = _RECORD_TOS_TYPE_INDEX, - [BINARY_OP_SUBSCR_GETITEM] = _RECORD_NOS_INDEX, - [SEND_GEN] = _RECORD_3OS_GEN_FUNC_INDEX, - [LOAD_SUPER_ATTR_METHOD] = _RECORD_NOS_INDEX, - [LOAD_ATTR_INSTANCE_VALUE] = _RECORD_TOS_TYPE_INDEX, - [LOAD_ATTR_WITH_HINT] = _RECORD_TOS_TYPE_INDEX, - [LOAD_ATTR_SLOT] = _RECORD_TOS_TYPE_INDEX, - [LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = _RECORD_TOS_TYPE_INDEX, - [LOAD_ATTR_PROPERTY] = _RECORD_TOS_TYPE_INDEX, - [STORE_ATTR_INSTANCE_VALUE] = _RECORD_TOS_TYPE_INDEX, - [STORE_ATTR_WITH_HINT] = _RECORD_TOS_TYPE_INDEX, - [STORE_ATTR_SLOT] = _RECORD_TOS_TYPE_INDEX, - [FOR_ITER_GEN] = _RECORD_NOS_GEN_FUNC_INDEX, - [LOAD_ATTR_METHOD_WITH_VALUES] = _RECORD_TOS_TYPE_INDEX, - [LOAD_ATTR_METHOD_NO_DICT] = _RECORD_TOS_TYPE_INDEX, - [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = _RECORD_TOS_TYPE_INDEX, - [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = _RECORD_TOS_TYPE_INDEX, - [LOAD_ATTR_METHOD_LAZY_DICT] = _RECORD_TOS_TYPE_INDEX, - [CALL_PY_GENERAL] = _RECORD_CALLABLE_INDEX, - [CALL_BOUND_METHOD_GENERAL] = _RECORD_BOUND_METHOD_INDEX, - [CALL_NON_PY_GENERAL] = _RECORD_CALLABLE_INDEX, - [CALL_BOUND_METHOD_EXACT_ARGS] = _RECORD_BOUND_METHOD_INDEX, - [CALL_PY_EXACT_ARGS] = _RECORD_CALLABLE_INDEX, - [CALL_ALLOC_AND_ENTER_INIT] = _RECORD_CALLABLE_INDEX, - [CALL_BUILTIN_CLASS] = _RECORD_CALLABLE_INDEX, - [CALL_BUILTIN_O] = _RECORD_CALLABLE_INDEX, - [CALL_BUILTIN_FAST] = _RECORD_CALLABLE_INDEX, - [CALL_BUILTIN_FAST_WITH_KEYWORDS] = _RECORD_CALLABLE_INDEX, - [CALL_METHOD_DESCRIPTOR_O] = _RECORD_CALLABLE_INDEX, - [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = _RECORD_CALLABLE_INDEX, - [CALL_METHOD_DESCRIPTOR_NOARGS] = _RECORD_CALLABLE_INDEX, - [CALL_KW_PY] = _RECORD_CALLABLE_KW_INDEX, - [CALL_KW_BOUND_METHOD] = _RECORD_CALLABLE_KW_INDEX, - [CALL_EX_PY] = _RECORD_4OS_INDEX, + +const _PyOpcodeRecordEntry _PyOpcode_RecordEntries[256] = { + [TO_BOOL_ALWAYS_TRUE] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [BINARY_OP_SUBSCR_GETITEM] = {1, {_RECORD_NOS_INDEX}}, + [SEND_GEN] = {1, {_RECORD_3OS_GEN_FUNC_INDEX}}, + [LOAD_SUPER_ATTR_METHOD] = {1, {_RECORD_NOS_INDEX}}, + [LOAD_ATTR_INSTANCE_VALUE] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [LOAD_ATTR_WITH_HINT] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [LOAD_ATTR_SLOT] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [LOAD_ATTR_PROPERTY] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [STORE_ATTR_INSTANCE_VALUE] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [STORE_ATTR_WITH_HINT] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [STORE_ATTR_SLOT] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [FOR_ITER_GEN] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}}, + [LOAD_ATTR_METHOD_WITH_VALUES] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [LOAD_ATTR_METHOD_NO_DICT] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [LOAD_ATTR_METHOD_LAZY_DICT] = {1, {_RECORD_TOS_TYPE_INDEX}}, + [CALL_PY_GENERAL] = {1, {_RECORD_CALLABLE_INDEX}}, + [CALL_BOUND_METHOD_GENERAL] = {1, {_RECORD_BOUND_METHOD_INDEX}}, + [CALL_NON_PY_GENERAL] = {1, {_RECORD_CALLABLE_INDEX}}, + [CALL_BOUND_METHOD_EXACT_ARGS] = {1, {_RECORD_BOUND_METHOD_INDEX}}, + [CALL_PY_EXACT_ARGS] = {1, {_RECORD_CALLABLE_INDEX}}, + [CALL_ALLOC_AND_ENTER_INIT] = {1, {_RECORD_CALLABLE_INDEX}}, + [CALL_BUILTIN_CLASS] = {1, {_RECORD_CALLABLE_INDEX}}, + [CALL_BUILTIN_O] = {1, {_RECORD_CALLABLE_INDEX}}, + [CALL_BUILTIN_FAST] = {1, {_RECORD_CALLABLE_INDEX}}, + [CALL_BUILTIN_FAST_WITH_KEYWORDS] = {1, {_RECORD_CALLABLE_INDEX}}, + [CALL_METHOD_DESCRIPTOR_O] = {1, {_RECORD_CALLABLE_INDEX}}, + [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = {1, {_RECORD_CALLABLE_INDEX}}, + [CALL_METHOD_DESCRIPTOR_NOARGS] = {1, {_RECORD_CALLABLE_INDEX}}, + [CALL_KW_PY] = {1, {_RECORD_CALLABLE_KW_INDEX}}, + [CALL_KW_BOUND_METHOD] = {1, {_RECORD_CALLABLE_KW_INDEX}}, + [CALL_EX_PY] = {1, {_RECORD_4OS_INDEX}}, }; const _Py_RecordFuncPtr _PyOpcode_RecordFunctions[9] = { diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 100de4c72509..414ca18be465 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -1132,9 +1132,7 @@ def add_macro( macro: parser.Macro, instructions: dict[str, Instruction], uops: dict[str, Uop] ) -> None: parts: list[Part] = [] - # Track the last non-specializing uop seen, so that recording uops - # can follow specializing ones without triggering the position check. - prev_uop: Uop | None = None + seen_real_uop = False for part in macro.uops: match part: case parser.OpName(): @@ -1146,14 +1144,15 @@ def add_macro( f"No Uop named {part.name}", macro.tokens[0] ) uop = uops[part.name] - if uop.properties.records_value and prev_uop is not None: - raise analysis_error( - f"Recording uop {part.name} is not allowed " - f"after non-specializing uops in macro", - macro.tokens[0]) + if uop.properties.records_value: + if seen_real_uop: + raise analysis_error( + f"Recording uop {part.name} must precede all " + f"non-recording, non-specializing uops in macro", + macro.tokens[0]) + elif "specializing" not in uop.annotations: + seen_real_uop = True parts.append(uop) - if "specializing" not in uop.annotations: - prev_uop = uop case parser.CacheEffect(): parts.append(Skip(part.size)) case _: diff --git a/Tools/cases_generator/record_function_generator.py b/Tools/cases_generator/record_function_generator.py index 58d948f198c4..d7ae0ebf79fe 100644 --- a/Tools/cases_generator/record_function_generator.py +++ b/Tools/cases_generator/record_function_generator.py @@ -25,6 +25,9 @@ from stack import Stack, Storage DEFAULT_OUTPUT = ROOT / "Python/recorder_functions.c.h" +# Must match MAX_RECORDED_VALUES in Include/internal/pycore_optimizer.h. +MAX_RECORDED_VALUES = 3 + class RecorderEmitter(Emitter): def __init__(self, out: CWriter): @@ -81,27 +84,35 @@ def generate_recorder_functions(filenames: list[str], analysis: Analysis, out: C def generate_recorder_tables(analysis: Analysis, out: CWriter) -> None: record_function_indexes: dict[str, int] = dict() - record_table: dict[str, str] = {} + record_table: dict[str, list[str]] = {} index = 1 for inst in analysis.instructions.values(): if not inst.properties.records_value: continue + records: list[str] = [] for part in inst.parts: if not part.properties.records_value: continue if part.name not in record_function_indexes: record_function_indexes[part.name] = index index += 1 - record_table[inst.name] = part.name - break + records.append(part.name) + if records: + if len(records) > MAX_RECORDED_VALUES: + raise ValueError( + f"Instruction {inst.name} has {len(records)} recording ops, " + f"exceeds MAX_RECORDED_VALUES ({MAX_RECORDED_VALUES})" + ) + record_table[inst.name] = records func_count = len(record_function_indexes) for name, index in record_function_indexes.items(): out.emit(f"#define {name}_INDEX {index}\n") - args = "_PyJitTracerState *tracer, _PyInterpreterFrame *frame, _PyStackRef *stackpointer, int oparg" - out.emit("const uint8_t _PyOpcode_RecordFunctionIndices[256] = {\n") - for inst_name, record_name in record_table.items(): - out.emit(f" [{inst_name}] = {record_name}_INDEX,\n") + out.emit("\n") + out.emit("const _PyOpcodeRecordEntry _PyOpcode_RecordEntries[256] = {\n") + for inst_name, record_names in record_table.items(): + indices = ", ".join(f"{name}_INDEX" for name in record_names) + out.emit(f" [{inst_name}] = {{{len(record_names)}, {{{indices}}}}},\n") out.emit("};\n\n") out.emit(f"const _Py_RecordFuncPtr _PyOpcode_RecordFunctions[{func_count+1}] = {{\n") out.emit(" [0] = NULL,\n")