From: Donghee Na Date: Sun, 14 Dec 2025 12:26:00 +0000 (+0900) Subject: gh-134584: Eliminate redundant refcounting from _STORE_SUBSCR_LIST_INT (gh-142703) X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=04da416e6b97e8a673116644efa9b898c8101849;p=thirdparty%2FPython%2Fcpython.git gh-134584: Eliminate redundant refcounting from _STORE_SUBSCR_LIST_INT (gh-142703) --- diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 0fb01982b6aa..0a29fabe7676 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1494,7 +1494,7 @@ _PyOpcode_macro_expansion[256] = { [STORE_SLICE] = { .nuops = 1, .uops = { { _STORE_SLICE, OPARG_SIMPLE, 0 } } }, [STORE_SUBSCR] = { .nuops = 1, .uops = { { _STORE_SUBSCR, OPARG_SIMPLE, 0 } } }, [STORE_SUBSCR_DICT] = { .nuops = 2, .uops = { { _GUARD_NOS_DICT, OPARG_SIMPLE, 0 }, { _STORE_SUBSCR_DICT, OPARG_SIMPLE, 1 } } }, - [STORE_SUBSCR_LIST_INT] = { .nuops = 3, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_LIST, OPARG_SIMPLE, 0 }, { _STORE_SUBSCR_LIST_INT, OPARG_SIMPLE, 1 } } }, + [STORE_SUBSCR_LIST_INT] = { .nuops = 5, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_LIST, OPARG_SIMPLE, 0 }, { _STORE_SUBSCR_LIST_INT, OPARG_SIMPLE, 1 }, { _POP_TOP_INT, OPARG_SIMPLE, 1 }, { _POP_TOP, OPARG_SIMPLE, 1 } } }, [SWAP] = { .nuops = 1, .uops = { { _SWAP, OPARG_SIMPLE, 0 } } }, [TO_BOOL] = { .nuops = 1, .uops = { { _TO_BOOL, OPARG_SIMPLE, 2 } } }, [TO_BOOL_ALWAYS_TRUE] = { .nuops = 2, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _REPLACE_WITH_TRUE, OPARG_SIMPLE, 3 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index e871e7699bf7..64e51bd2b8bb 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -1068,7 +1068,7 @@ extern "C" { #define _STORE_SLICE_r30 1261 #define _STORE_SUBSCR_r30 1262 #define _STORE_SUBSCR_DICT_r30 1263 -#define _STORE_SUBSCR_LIST_INT_r30 1264 +#define _STORE_SUBSCR_LIST_INT_r32 1264 #define _SWAP_r11 1265 #define _SWAP_2_r02 1266 #define _SWAP_2_r12 1267 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 5ae05358592a..5fa375a8ce6b 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -1265,7 +1265,7 @@ const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1] = { { -1, -1, -1 }, { -1, -1, -1 }, { -1, -1, -1 }, - { 0, 3, _STORE_SUBSCR_LIST_INT_r30 }, + { 2, 3, _STORE_SUBSCR_LIST_INT_r32 }, }, }, [_STORE_SUBSCR_DICT] = { @@ -3498,7 +3498,7 @@ const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1] = { [_LIST_APPEND_r10] = _LIST_APPEND, [_SET_ADD_r10] = _SET_ADD, [_STORE_SUBSCR_r30] = _STORE_SUBSCR, - [_STORE_SUBSCR_LIST_INT_r30] = _STORE_SUBSCR_LIST_INT, + [_STORE_SUBSCR_LIST_INT_r32] = _STORE_SUBSCR_LIST_INT, [_STORE_SUBSCR_DICT_r30] = _STORE_SUBSCR_DICT, [_DELETE_SUBSCR_r20] = _DELETE_SUBSCR, [_CALL_INTRINSIC_1_r11] = _CALL_INTRINSIC_1, @@ -4869,7 +4869,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] = { [_STORE_SUBSCR_DICT] = "_STORE_SUBSCR_DICT", [_STORE_SUBSCR_DICT_r30] = "_STORE_SUBSCR_DICT_r30", [_STORE_SUBSCR_LIST_INT] = "_STORE_SUBSCR_LIST_INT", - [_STORE_SUBSCR_LIST_INT_r30] = "_STORE_SUBSCR_LIST_INT_r30", + [_STORE_SUBSCR_LIST_INT_r32] = "_STORE_SUBSCR_LIST_INT_r32", [_SWAP] = "_SWAP", [_SWAP_r11] = "_SWAP_r11", [_SWAP_2] = "_SWAP_2", diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 32c00b57e1a4..0f6ed3d85f03 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2499,6 +2499,24 @@ class TestUopsOptimization(unittest.TestCase): self.assertNotIn("_GUARD_TOS_INT", uops) self.assertNotIn("_GUARD_NOS_INT", uops) + def test_store_subscr_int(self): + def testfunc(n): + l = [0, 0, 0, 0] + for _ in range(n): + l[0] = 1 + l[1] = 2 + l[2] = 3 + l[3] = 4 + return sum(l) + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, 10) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertNotIn("_POP_TOP", uops) + self.assertNotIn("_POP_TOP_INT", uops) + self.assertIn("_POP_TOP_NOP", uops) + def test_attr_promotion_failure(self): # We're not testing for any specific uops here, just # testing it doesn't crash. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7e78e726b911..daa3d218e387 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1126,9 +1126,9 @@ dummy_func( macro(STORE_SUBSCR) = _SPECIALIZE_STORE_SUBSCR + _STORE_SUBSCR; macro(STORE_SUBSCR_LIST_INT) = - _GUARD_TOS_INT + _GUARD_NOS_LIST + unused/1 + _STORE_SUBSCR_LIST_INT; + _GUARD_TOS_INT + _GUARD_NOS_LIST + unused/1 + _STORE_SUBSCR_LIST_INT + _POP_TOP_INT + POP_TOP; - op(_STORE_SUBSCR_LIST_INT, (value, list_st, sub_st -- )) { + op(_STORE_SUBSCR_LIST_INT, (value, list_st, sub_st -- ls, ss)) { PyObject *sub = PyStackRef_AsPyObjectBorrow(sub_st); PyObject *list = PyStackRef_AsPyObjectBorrow(list_st); @@ -1151,9 +1151,9 @@ dummy_func( PyStackRef_AsPyObjectSteal(value)); assert(old_value != NULL); UNLOCK_OBJECT(list); // unlock before decrefs! - PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); - DEAD(sub_st); - PyStackRef_CLOSE(list_st); + INPUTS_DEAD(); + ls = list_st; + ss = sub_st; Py_DECREF(old_value); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 727d3695bb11..2a1156091e3d 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5703,12 +5703,14 @@ break; } - case _STORE_SUBSCR_LIST_INT_r30: { + case _STORE_SUBSCR_LIST_INT_r32: { CHECK_CURRENT_CACHED_VALUES(3); assert(WITHIN_STACK_BOUNDS_WITH_CACHE()); _PyStackRef sub_st; _PyStackRef list_st; _PyStackRef value; + _PyStackRef ls; + _PyStackRef ss; _PyStackRef _stack_item_0 = _tos_cache0; _PyStackRef _stack_item_1 = _tos_cache1; _PyStackRef _stack_item_2 = _tos_cache2; @@ -5744,15 +5746,21 @@ PyStackRef_AsPyObjectSteal(value)); assert(old_value != NULL); UNLOCK_OBJECT(list); - PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); + ls = list_st; + ss = sub_st; + stack_pointer[0] = ls; + stack_pointer[1] = ss; + stack_pointer += 2; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(list_st); Py_DECREF(old_value); stack_pointer = _PyFrame_GetStackPointer(frame); - _tos_cache0 = PyStackRef_ZERO_BITS; - _tos_cache1 = PyStackRef_ZERO_BITS; + _tos_cache1 = ss; + _tos_cache0 = ls; _tos_cache2 = PyStackRef_ZERO_BITS; - SET_CURRENT_CACHED_VALUES(0); + SET_CURRENT_CACHED_VALUES(2); + stack_pointer += -2; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); assert(WITHIN_STACK_BOUNDS_WITH_CACHE()); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e1860f5d9d8f..ab9373e0af5a 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -11024,6 +11024,8 @@ _PyStackRef nos; _PyStackRef list_st; _PyStackRef sub_st; + _PyStackRef ls; + _PyStackRef ss; // _GUARD_TOS_INT { value = stack_pointer[-1]; @@ -11079,14 +11081,31 @@ PyStackRef_AsPyObjectSteal(value)); assert(old_value != NULL); UNLOCK_OBJECT(list); - PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); - stack_pointer += -3; + ls = list_st; + ss = sub_st; + stack_pointer[-3] = ls; + stack_pointer[-2] = ss; + stack_pointer += -1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(list_st); Py_DECREF(old_value); stack_pointer = _PyFrame_GetStackPointer(frame); } + // _POP_TOP_INT + { + value = ss; + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + } + // _POP_TOP + { + value = ls; + stack_pointer += -2; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); + } DISPATCH(); } diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 071f4ca31ba7..66aecf7ef543 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -103,6 +103,12 @@ dummy_func(void) { GETLOCAL(oparg) = value; } + op(_STORE_SUBSCR_LIST_INT, (value, list_st, sub_st -- ls, ss)) { + (void)value; + ls = list_st; + ss = sub_st; + } + op(_PUSH_NULL, (-- res)) { res = sym_new_null(ctx); } @@ -529,6 +535,12 @@ dummy_func(void) { } } + op(_POP_TOP_INT, (value --)) { + if (PyJitRef_IsBorrowed(value)) { + REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0); + } + } + op(_COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { assert(oparg > 0); top = bottom; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 5c5efdaa1fd8..1a3d4ad50bd8 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -141,6 +141,11 @@ } case _POP_TOP_INT: { + JitOptRef value; + value = stack_pointer[-1]; + if (PyJitRef_IsBorrowed(value)) { + REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0); + } CHECK_STACK_BOUNDS(-1); stack_pointer += -1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); @@ -1123,8 +1128,21 @@ } case _STORE_SUBSCR_LIST_INT: { - CHECK_STACK_BOUNDS(-3); - stack_pointer += -3; + JitOptRef sub_st; + JitOptRef list_st; + JitOptRef value; + JitOptRef ls; + JitOptRef ss; + sub_st = stack_pointer[-1]; + list_st = stack_pointer[-2]; + value = stack_pointer[-3]; + (void)value; + ls = list_st; + ss = sub_st; + CHECK_STACK_BOUNDS(-1); + stack_pointer[-3] = ls; + stack_pointer[-2] = ss; + stack_pointer += -1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; }