From: Savannah Bailey Date: Mon, 15 Sep 2025 15:29:45 +0000 (+0100) Subject: GH-132732: Use pure op machinery to optimize various instructions with `_POP_TOP... X-Git-Tag: v3.15.0a1~359 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=9c9a0f7da7bf626b6d156c9fe3df22597ee3fe9e;p=thirdparty%2FPython%2Fcpython.git GH-132732: Use pure op machinery to optimize various instructions with `_POP_TOP` and `_POP_TWO` (#137577) --- diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index ffd65dbb1464..9601cedfe56f 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1614,6 +1614,74 @@ class TestUopsOptimization(unittest.TestCase): # But all of the appends we care about are still there: self.assertEqual(uops.count("_CALL_LIST_APPEND"), len("ABCDEFG")) + def test_unary_negative_pop_top_load_const_inline_borrow(self): + def testfunc(n): + x = 0 + for i in range(n): + a = 1 + result = -a + if result < 0: + x += 1 + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertNotIn("_UNARY_NEGATIVE", uops) + self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops) + + def test_unary_not_pop_top_load_const_inline_borrow(self): + def testfunc(n): + x = 0 + for i in range(n): + a = 42 + result = not a + if result: + x += 1 + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, 0) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertNotIn("_UNARY_NOT", uops) + self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops) + + def test_unary_invert_pop_top_load_const_inline_borrow(self): + def testfunc(n): + x = 0 + for i in range(n): + a = 0 + result = ~a + if result < 0: + x += 1 + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertNotIn("_UNARY_INVERT", uops) + self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops) + + def test_compare_op_pop_two_load_const_inline_borrow(self): + def testfunc(n): + x = 0 + for _ in range(n): + a = 10 + b = 10.0 + if a == b: + x += 1 + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertNotIn("_COMPARE_OP", uops) + self.assertNotIn("_POP_TWO_LOAD_CONST_INLINE_BORROW", uops) + def test_compare_op_int_pop_two_load_const_inline_borrow(self): def testfunc(n): x = 0 @@ -1665,6 +1733,23 @@ class TestUopsOptimization(unittest.TestCase): self.assertNotIn("_COMPARE_OP_FLOAT", uops) self.assertNotIn("_POP_TWO_LOAD_CONST_INLINE_BORROW", uops) + def test_contains_op_pop_two_load_const_inline_borrow(self): + def testfunc(n): + x = 0 + for _ in range(n): + a = "foo" + s = "foo bar baz" + if a in s: + x += 1 + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertNotIn("_CONTAINS_OP", uops) + self.assertNotIn("_POP_TWO_LOAD_CONST_INLINE_BORROW", uops) + def test_to_bool_bool_contains_op_set(self): """ Test that _TO_BOOL_BOOL is removed from code like: diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-08-09-04-07-05.gh-issue-132732.8BiIVJ.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-09-04-07-05.gh-issue-132732.8BiIVJ.rst new file mode 100644 index 000000000000..c1fa14e0566e --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-09-04-07-05.gh-issue-132732.8BiIVJ.rst @@ -0,0 +1 @@ +Optimize ``_COMPARE_OP``, ``_CONTAINS_OP``, ``_UNARY_NEGATIVE``, ``_UNARY_NOT``, and ``_UNARY_INVERT`` in JIT builds with constant-loading uops (``_POP_TWO_LOAD_CONST_INLINE_BORROW`` and ``_POP_TOP_LOAD_CONST_INLINE_BORROW``), and then remove both to reduce instruction count. diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index eccbddf0546a..8f719f5750bd 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -397,6 +397,7 @@ dummy_func(void) { } op(_UNARY_NEGATIVE, (value -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(value); if (sym_is_compact_int(value)) { res = sym_new_compact_int(ctx); } @@ -412,6 +413,10 @@ dummy_func(void) { } op(_UNARY_INVERT, (value -- res)) { + // Required to avoid a warning due to the deprecation of bitwise inversion of bools + if (!sym_matches_type(value, &PyBool_Type)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(value); + } if (sym_matches_type(value, &PyLong_Type)) { res = sym_new_type(ctx, &PyLong_Type); } @@ -421,6 +426,9 @@ dummy_func(void) { } op(_COMPARE_OP, (left, right -- res)) { + // Comparison between bytes and str or int is not impacted by this optimization as bytes + // is not a safe type (due to its ability to raise a warning during comparisons). + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); if (oparg & 16) { res = sym_new_type(ctx, &PyBool_Type); } @@ -449,6 +457,7 @@ dummy_func(void) { } op(_CONTAINS_OP, (left, right -- b)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); b = sym_new_type(ctx, &PyBool_Type); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 8617355e25f4..99601b016acc 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -188,6 +188,31 @@ JitOptRef value; JitOptRef res; value = stack_pointer[-1]; + if ( + sym_is_safe_const(ctx, value) + ) { + JitOptRef value_sym = value; + _PyStackRef value = sym_get_const_as_stackref(ctx, value_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *res_o = PyNumber_Negative(PyStackRef_AsPyObjectBorrow(value)); + PyStackRef_CLOSE(value); + if (res_o == NULL) { + goto error; + } + res_stackref = PyStackRef_FromPyObjectSteal(res_o); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + if (sym_is_const(ctx, res)) { + PyObject *result = sym_get_const(ctx, res); + if (_Py_IsImmortal(result)) { + // Replace with _POP_TOP_LOAD_CONST_INLINE_BORROW since we have one input and an immortal result + REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); + } + } + stack_pointer[-1] = res; + break; + } if (sym_is_compact_int(value)) { res = sym_new_compact_int(ctx); } @@ -220,6 +245,13 @@ ? PyStackRef_True : PyStackRef_False; /* End of uop copied from bytecodes for constant evaluation */ res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + if (sym_is_const(ctx, res)) { + PyObject *result = sym_get_const(ctx, res); + if (_Py_IsImmortal(result)) { + // Replace with _POP_TOP_LOAD_CONST_INLINE_BORROW since we have one input and an immortal result + REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); + } + } stack_pointer[-1] = res; break; } @@ -359,6 +391,33 @@ JitOptRef value; JitOptRef res; value = stack_pointer[-1]; + if (!sym_matches_type(value, &PyBool_Type)) { + if ( + sym_is_safe_const(ctx, value) + ) { + JitOptRef value_sym = value; + _PyStackRef value = sym_get_const_as_stackref(ctx, value_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *res_o = PyNumber_Invert(PyStackRef_AsPyObjectBorrow(value)); + PyStackRef_CLOSE(value); + if (res_o == NULL) { + goto error; + } + res_stackref = PyStackRef_FromPyObjectSteal(res_o); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + if (sym_is_const(ctx, res)) { + PyObject *result = sym_get_const(ctx, res); + if (_Py_IsImmortal(result)) { + // Replace with _POP_TOP_LOAD_CONST_INLINE_BORROW since we have one input and an immortal result + REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); + } + } + stack_pointer[-1] = res; + break; + } + } if (sym_matches_type(value, &PyLong_Type)) { res = sym_new_type(ctx, &PyLong_Type); } @@ -438,7 +497,6 @@ PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); /* End of uop copied from bytecodes for constant evaluation */ res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); - if (sym_is_const(ctx, res)) { PyObject *result = sym_get_const(ctx, res); if (_Py_IsImmortal(result)) { @@ -489,7 +547,6 @@ PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); /* End of uop copied from bytecodes for constant evaluation */ res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); - if (sym_is_const(ctx, res)) { PyObject *result = sym_get_const(ctx, res); if (_Py_IsImmortal(result)) { @@ -540,7 +597,6 @@ PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); /* End of uop copied from bytecodes for constant evaluation */ res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); - if (sym_is_const(ctx, res)) { PyObject *result = sym_get_const(ctx, res); if (_Py_IsImmortal(result)) { @@ -610,7 +666,6 @@ } /* End of uop copied from bytecodes for constant evaluation */ res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); - if (sym_is_const(ctx, res)) { PyObject *result = sym_get_const(ctx, res); if (_Py_IsImmortal(result)) { @@ -663,7 +718,6 @@ } /* End of uop copied from bytecodes for constant evaluation */ res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); - if (sym_is_const(ctx, res)) { PyObject *result = sym_get_const(ctx, res); if (_Py_IsImmortal(result)) { @@ -716,7 +770,6 @@ } /* End of uop copied from bytecodes for constant evaluation */ res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); - if (sym_is_const(ctx, res)) { PyObject *result = sym_get_const(ctx, res); if (_Py_IsImmortal(result)) { @@ -796,7 +849,6 @@ res_stackref = PyStackRef_FromPyObjectSteal(res_o); /* End of uop copied from bytecodes for constant evaluation */ res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); - if (sym_is_const(ctx, res)) { PyObject *result = sym_get_const(ctx, res); if (_Py_IsImmortal(result)) { @@ -1642,7 +1694,53 @@ } case _COMPARE_OP: { + JitOptRef right; + JitOptRef left; JitOptRef res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert((oparg >> 5) <= Py_GE); + PyObject *res_o = PyObject_RichCompare(left_o, right_o, oparg >> 5); + if (res_o == NULL) { + goto error; + } + if (oparg & 16) { + int res_bool = PyObject_IsTrue(res_o); + Py_DECREF(res_o); + if (res_bool < 0) { + goto error; + } + res_stackref = res_bool ? PyStackRef_True : PyStackRef_False; + } + else { + res_stackref = PyStackRef_FromPyObjectSteal(res_o); + } + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + if (sym_is_const(ctx, res)) { + PyObject *result = sym_get_const(ctx, res); + if (_Py_IsImmortal(result)) { + // Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result + REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); + } + } + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } if (oparg & 16) { res = sym_new_type(ctx, &PyBool_Type); } @@ -1682,7 +1780,6 @@ res_stackref = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False; /* End of uop copied from bytecodes for constant evaluation */ res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); - if (sym_is_const(ctx, res)) { PyObject *result = sym_get_const(ctx, res); if (_Py_IsImmortal(result)) { @@ -1733,7 +1830,6 @@ res_stackref = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False; /* End of uop copied from bytecodes for constant evaluation */ res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); - if (sym_is_const(ctx, res)) { PyObject *result = sym_get_const(ctx, res); if (_Py_IsImmortal(result)) { @@ -1782,7 +1878,6 @@ res_stackref = ((COMPARISON_NOT_EQUALS + eq) & oparg) ? PyStackRef_True : PyStackRef_False; /* End of uop copied from bytecodes for constant evaluation */ res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); - if (sym_is_const(ctx, res)) { PyObject *result = sym_get_const(ctx, res); if (_Py_IsImmortal(result)) { @@ -1812,7 +1907,42 @@ } case _CONTAINS_OP: { + JitOptRef right; + JitOptRef left; JitOptRef b; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef b_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + int res = PySequence_Contains(right_o, left_o); + if (res < 0) { + goto error; + } + b_stackref = (res ^ oparg) ? PyStackRef_True : PyStackRef_False; + /* End of uop copied from bytecodes for constant evaluation */ + b = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(b_stackref)); + if (sym_is_const(ctx, b)) { + PyObject *result = sym_get_const(ctx, b); + if (_Py_IsImmortal(result)) { + // Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result + REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); + } + } + stack_pointer[-2] = b; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } b = sym_new_type(ctx, &PyBool_Type); stack_pointer[-2] = b; stack_pointer += -1; @@ -2885,7 +3015,6 @@ res_stackref = PyStackRef_FromPyObjectSteal(res_o); /* End of uop copied from bytecodes for constant evaluation */ res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); - if (sym_is_const(ctx, res)) { PyObject *result = sym_get_const(ctx, res); if (_Py_IsImmortal(result)) { diff --git a/Tools/cases_generator/optimizer_generator.py b/Tools/cases_generator/optimizer_generator.py index b9985eaf4830..7486fca245f5 100644 --- a/Tools/cases_generator/optimizer_generator.py +++ b/Tools/cases_generator/optimizer_generator.py @@ -4,6 +4,7 @@ Writes the cases to optimizer_cases.c.h, which is #included in Python/optimizer_ """ import argparse +import textwrap from analyzer import ( Analysis, @@ -190,6 +191,7 @@ class OptimizerEmitter(Emitter): input_identifiers_as_str = {tkn.text for tkn in input_identifiers} used_stack_inputs = [inp for inp in uop.stack.inputs if inp.name in input_identifiers_as_str] assert len(used_stack_inputs) > 0 + self.out.start_line() emitter = OptimizerConstantEmitter(self.out, {}, self.original_uop, self.stack.copy()) emitter.emit("if (\n") for inp in used_stack_inputs[:-1]: @@ -232,18 +234,28 @@ class OptimizerEmitter(Emitter): emitter.emit(f"{outp.name} = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal({outp.name}_stackref));\n") else: emitter.emit(f"{outp.name} = sym_new_const(ctx, PyStackRef_AsPyObjectBorrow({outp.name}_stackref));\n") - - if len(used_stack_inputs) == 2 and len(self.original_uop.stack.outputs) == 1: - outp = self.original_uop.stack.outputs[0] - if not outp.peek: - emitter.emit(f""" - if (sym_is_const(ctx, {outp.name})) {{ - PyObject *result = sym_get_const(ctx, {outp.name}); - if (_Py_IsImmortal(result)) {{ - // Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result - REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); - }} - }}""") + if len(self.original_uop.stack.outputs) == 1: + outp = self.original_uop.stack.outputs[0] + if not outp.peek: + if self.original_uop.name.startswith('_'): + # Map input count to the appropriate constant-loading uop + input_count_to_uop = { + 1: "_POP_TOP_LOAD_CONST_INLINE_BORROW", + 2: "_POP_TWO_LOAD_CONST_INLINE_BORROW" + } + + input_count = len(used_stack_inputs) + if input_count in input_count_to_uop: + replacement_uop = input_count_to_uop[input_count] + input_desc = "one input" if input_count == 1 else "two inputs" + + emitter.emit(f"if (sym_is_const(ctx, {outp.name})) {{\n") + emitter.emit(f"PyObject *result = sym_get_const(ctx, {outp.name});\n") + emitter.emit(f"if (_Py_IsImmortal(result)) {{\n") + emitter.emit(f"// Replace with {replacement_uop} since we have {input_desc} and an immortal result\n") + emitter.emit(f"REPLACE_OP(this_instr, {replacement_uop}, 0, (uintptr_t)result);\n") + emitter.emit("}\n") + emitter.emit("}\n") storage.flush(self.out) emitter.emit("break;\n")