From a154c9ed4e8efff75e7a683f217d728f108b73dd Mon Sep 17 00:00:00 2001 From: Nadeshiko Manju Date: Sun, 14 Dec 2025 17:33:05 +0800 Subject: [PATCH] gh-134584: Eliminate redundant refcounting from `_CALL_STR_1` (GH-136070) Signed-off-by: Manjusaka --- Include/internal/pycore_opcode_metadata.h | 4 +-- Include/internal/pycore_uop_ids.h | 2 +- Include/internal/pycore_uop_metadata.h | 8 +++--- Lib/test/test_capi/test_opt.py | 14 +++++++++++ ...-06-28-17-54-27.gh-issue-134584.EXgPub.rst | 1 + Python/bytecodes.c | 14 +++++------ Python/executor_cases.c.h | 25 +++++++------------ Python/generated_cases.c.h | 23 +++++++++-------- Python/optimizer_bytecodes.c | 3 ++- Python/optimizer_cases.c.h | 7 ++++-- 10 files changed, 58 insertions(+), 43 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-28-17-54-27.gh-issue-134584.EXgPub.rst diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 82c7cf486b1a..ec2eb15151c9 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1129,7 +1129,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [CALL_NON_PY_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_PY_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_SYNC_SP_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [CALL_PY_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_SYNC_SP_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, - [CALL_STR_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_STR_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_TUPLE_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_TYPE_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [CHECK_EG_MATCH] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1375,7 +1375,7 @@ _PyOpcode_macro_expansion[256] = { [CALL_NON_PY_GENERAL] = { .nuops = 3, .uops = { { _CHECK_IS_NOT_PY_CALLABLE, OPARG_SIMPLE, 3 }, { _CALL_NON_PY_GENERAL, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC_AT_END, OPARG_REPLACED, 3 } } }, [CALL_PY_EXACT_ARGS] = { .nuops = 8, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_FUNCTION_VERSION, 2, 1 }, { _CHECK_FUNCTION_EXACT_ARGS, OPARG_SIMPLE, 3 }, { _CHECK_STACK_SPACE, OPARG_SIMPLE, 3 }, { _CHECK_RECURSION_REMAINING, OPARG_SIMPLE, 3 }, { _INIT_CALL_PY_EXACT_ARGS, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, [CALL_PY_GENERAL] = { .nuops = 6, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_FUNCTION_VERSION, 2, 1 }, { _CHECK_RECURSION_REMAINING, OPARG_SIMPLE, 3 }, { _PY_FRAME_GENERAL, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, - [CALL_STR_1] = { .nuops = 4, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_STR_1, OPARG_SIMPLE, 3 }, { _CALL_STR_1, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC_AT_END, OPARG_REPLACED, 3 } } }, + [CALL_STR_1] = { .nuops = 5, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_STR_1, OPARG_SIMPLE, 3 }, { _CALL_STR_1, OPARG_SIMPLE, 3 }, { _POP_TOP, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC_AT_END, OPARG_REPLACED, 3 } } }, [CALL_TUPLE_1] = { .nuops = 5, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_TUPLE_1, OPARG_SIMPLE, 3 }, { _CALL_TUPLE_1, OPARG_SIMPLE, 3 }, { _POP_TOP, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC_AT_END, OPARG_REPLACED, 3 } } }, [CALL_TYPE_1] = { .nuops = 3, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_TYPE_1, OPARG_SIMPLE, 3 }, { _CALL_TYPE_1, OPARG_SIMPLE, 3 } } }, [CHECK_EG_MATCH] = { .nuops = 1, .uops = { { _CHECK_EG_MATCH, OPARG_SIMPLE, 0 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index ce1ad5a4c8a5..eb8e915e6d9a 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -438,7 +438,7 @@ extern "C" { #define _CALL_METHOD_DESCRIPTOR_NOARGS_r01 631 #define _CALL_METHOD_DESCRIPTOR_O_r01 632 #define _CALL_NON_PY_GENERAL_r01 633 -#define _CALL_STR_1_r31 634 +#define _CALL_STR_1_r32 634 #define _CALL_TUPLE_1_r32 635 #define _CALL_TYPE_1_r31 636 #define _CHECK_AND_ALLOCATE_OBJECT_r00 637 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 0a37ce98c7eb..3bd19c82bc4e 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -274,7 +274,7 @@ const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GUARD_CALLABLE_TYPE_1] = HAS_DEOPT_FLAG, [_CALL_TYPE_1] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, [_GUARD_CALLABLE_STR_1] = HAS_DEOPT_FLAG, - [_CALL_STR_1] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_CALL_STR_1] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_GUARD_CALLABLE_TUPLE_1] = HAS_DEOPT_FLAG, [_CALL_TUPLE_1] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_CHECK_AND_ALLOCATE_OBJECT] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, @@ -2525,7 +2525,7 @@ const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1] = { { -1, -1, -1 }, { -1, -1, -1 }, { -1, -1, -1 }, - { 1, 0, _CALL_STR_1_r31 }, + { 2, 0, _CALL_STR_1_r32 }, }, }, [_GUARD_CALLABLE_TUPLE_1] = { @@ -3734,7 +3734,7 @@ const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1] = { [_GUARD_CALLABLE_STR_1_r13] = _GUARD_CALLABLE_STR_1, [_GUARD_CALLABLE_STR_1_r23] = _GUARD_CALLABLE_STR_1, [_GUARD_CALLABLE_STR_1_r33] = _GUARD_CALLABLE_STR_1, - [_CALL_STR_1_r31] = _CALL_STR_1, + [_CALL_STR_1_r32] = _CALL_STR_1, [_GUARD_CALLABLE_TUPLE_1_r03] = _GUARD_CALLABLE_TUPLE_1, [_GUARD_CALLABLE_TUPLE_1_r13] = _GUARD_CALLABLE_TUPLE_1, [_GUARD_CALLABLE_TUPLE_1_r23] = _GUARD_CALLABLE_TUPLE_1, @@ -4056,7 +4056,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] = { [_CALL_NON_PY_GENERAL] = "_CALL_NON_PY_GENERAL", [_CALL_NON_PY_GENERAL_r01] = "_CALL_NON_PY_GENERAL_r01", [_CALL_STR_1] = "_CALL_STR_1", - [_CALL_STR_1_r31] = "_CALL_STR_1_r31", + [_CALL_STR_1_r32] = "_CALL_STR_1_r32", [_CALL_TUPLE_1] = "_CALL_TUPLE_1", [_CALL_TUPLE_1_r32] = "_CALL_TUPLE_1_r32", [_CALL_TYPE_1] = "_CALL_TYPE_1", diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 0e7fd62c28a0..bd48db537a2f 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1957,6 +1957,20 @@ class TestUopsOptimization(unittest.TestCase): self.assertNotIn("_GUARD_NOS_NULL", uops) self.assertNotIn("_GUARD_CALLABLE_STR_1", uops) + def test_call_str_1_pop_top(self): + def testfunc(n): + x = 0 + for _ in range(n): + t = str("") + x += 1 if len(t) == 0 else 0 + return x + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_CALL_STR_1", uops) + self.assertIn("_POP_TOP_NOP", uops) + def test_call_str_1_result_is_str(self): def testfunc(n): x = 0 diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-28-17-54-27.gh-issue-134584.EXgPub.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-28-17-54-27.gh-issue-134584.EXgPub.rst new file mode 100644 index 000000000000..638c170e262c --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-28-17-54-27.gh-issue-134584.EXgPub.rst @@ -0,0 +1 @@ +Eliminate redundant refcounting from ``_CALL_STR_1``. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index d96a1944cd51..3e76dcd77561 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4040,18 +4040,17 @@ dummy_func( DEOPT_IF(callable_o != (PyObject *)&PyUnicode_Type); } - op(_CALL_STR_1, (callable, null, arg -- res)) { + op(_CALL_STR_1, (callable, null, arg -- res, a)) { PyObject *arg_o = PyStackRef_AsPyObjectBorrow(arg); assert(oparg == 1); STAT_INC(CALL, hit); PyObject *res_o = PyObject_Str(arg_o); - DEAD(null); - DEAD(callable); - (void)callable; // Silence compiler warnings about unused variables - (void)null; - PyStackRef_CLOSE(arg); - ERROR_IF(res_o == NULL); + if (res_o == NULL) { + ERROR_NO_POP(); + } + a = arg; + INPUTS_DEAD(); res = PyStackRef_FromPyObjectSteal(res_o); } @@ -4061,6 +4060,7 @@ dummy_func( _GUARD_NOS_NULL + _GUARD_CALLABLE_STR_1 + _CALL_STR_1 + + POP_TOP + _CHECK_PERIODIC_AT_END; op(_GUARD_CALLABLE_TUPLE_1, (callable, unused, unused -- callable, unused, unused)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index fab23f358682..ff73b55bc6aa 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -12755,47 +12755,40 @@ break; } - case _CALL_STR_1_r31: { + case _CALL_STR_1_r32: { CHECK_CURRENT_CACHED_VALUES(3); assert(WITHIN_STACK_BOUNDS_WITH_CACHE()); _PyStackRef arg; - _PyStackRef null; - _PyStackRef callable; _PyStackRef res; + _PyStackRef a; _PyStackRef _stack_item_0 = _tos_cache0; _PyStackRef _stack_item_1 = _tos_cache1; _PyStackRef _stack_item_2 = _tos_cache2; oparg = CURRENT_OPARG(); arg = _stack_item_2; - null = _stack_item_1; - callable = _stack_item_0; PyObject *arg_o = PyStackRef_AsPyObjectBorrow(arg); assert(oparg == 1); STAT_INC(CALL, hit); - stack_pointer[0] = callable; - stack_pointer[1] = null; + stack_pointer[0] = _stack_item_0; + stack_pointer[1] = _stack_item_1; stack_pointer[2] = arg; stack_pointer += 3; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _PyFrame_SetStackPointer(frame, stack_pointer); PyObject *res_o = PyObject_Str(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); - (void)callable; - (void)null; - stack_pointer += -3; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(arg); - stack_pointer = _PyFrame_GetStackPointer(frame); if (res_o == NULL) { SET_CURRENT_CACHED_VALUES(0); JUMP_TO_ERROR(); } + a = arg; res = PyStackRef_FromPyObjectSteal(res_o); + _tos_cache1 = a; _tos_cache0 = res; - _tos_cache1 = PyStackRef_ZERO_BITS; _tos_cache2 = PyStackRef_ZERO_BITS; - SET_CURRENT_CACHED_VALUES(1); + SET_CURRENT_CACHED_VALUES(2); + stack_pointer += -3; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); assert(WITHIN_STACK_BOUNDS_WITH_CACHE()); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 413593386583..1d8c01bd1b35 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3968,6 +3968,8 @@ _PyStackRef callable; _PyStackRef arg; _PyStackRef res; + _PyStackRef a; + _PyStackRef value; /* Skip 1 cache entry */ /* Skip 2 cache entries */ // _GUARD_NOS_NULL @@ -3998,23 +4000,24 @@ _PyFrame_SetStackPointer(frame, stack_pointer); PyObject *res_o = PyObject_Str(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); - (void)callable; - (void)null; - stack_pointer += -3; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(arg); - stack_pointer = _PyFrame_GetStackPointer(frame); if (res_o == NULL) { JUMP_TO_LABEL(error); } + a = arg; res = PyStackRef_FromPyObjectSteal(res_o); } - // _CHECK_PERIODIC_AT_END + // _POP_TOP { - stack_pointer[0] = res; - stack_pointer += 1; + value = a; + stack_pointer[-3] = res; + stack_pointer += -2; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + // _CHECK_PERIODIC_AT_END + { _PyFrame_SetStackPointer(frame, stack_pointer); int err = check_periodics(tstate); stack_pointer = _PyFrame_GetStackPointer(frame); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 9e767464500d..071f4ca31ba7 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -951,7 +951,7 @@ dummy_func(void) { } } - op(_CALL_STR_1, (unused, unused, arg -- res)) { + op(_CALL_STR_1, (unused, unused, arg -- res, a)) { if (sym_matches_type(arg, &PyUnicode_Type)) { // e.g. str('foo') or str(foo) where foo is known to be a string // Note: we must strip the reference information because it goes @@ -961,6 +961,7 @@ dummy_func(void) { else { res = sym_new_type(ctx, &PyUnicode_Type); } + a = arg; } op(_CALL_ISINSTANCE, (unused, unused, instance, cls -- res)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index c3f122215ed7..d3cff14be70a 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2855,6 +2855,7 @@ case _CALL_STR_1: { JitOptRef arg; JitOptRef res; + JitOptRef a; arg = stack_pointer[-1]; if (sym_matches_type(arg, &PyUnicode_Type)) { res = PyJitRef_StripReferenceInfo(arg); @@ -2862,9 +2863,11 @@ else { res = sym_new_type(ctx, &PyUnicode_Type); } - CHECK_STACK_BOUNDS(-2); + a = arg; + CHECK_STACK_BOUNDS(-1); stack_pointer[-3] = res; - stack_pointer += -2; + stack_pointer[-2] = a; + stack_pointer += -1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; } -- 2.47.3