From abaaeee6a0abe96ed4c4cfb22018e8b871a67102 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Tue, 16 Dec 2025 01:42:32 +0900 Subject: [PATCH] gh-134584: Eliminate redundant refcounting from _STORE_SUBSCR_DICT (GH-142712) Co-authored-by: Ken Jin --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_ids.h | 2 +- Include/internal/pycore_uop_metadata.h | 6 +++--- Lib/test/test_capi/test_opt.py | 19 +++++++++++++++++++ Python/bytecodes.c | 12 ++++++++---- Python/executor_cases.c.h | 20 ++++++++++++-------- Python/generated_cases.c.h | 19 +++++++++++++++---- Python/optimizer_bytecodes.c | 5 +++++ Python/optimizer_cases.c.h | 12 ++++++++++-- 9 files changed, 74 insertions(+), 23 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 9708ce817a1d..fda528065614 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1493,7 +1493,7 @@ _PyOpcode_macro_expansion[256] = { [STORE_NAME] = { .nuops = 1, .uops = { { _STORE_NAME, OPARG_SIMPLE, 0 } } }, [STORE_SLICE] = { .nuops = 1, .uops = { { _STORE_SLICE, OPARG_SIMPLE, 0 } } }, [STORE_SUBSCR] = { .nuops = 1, .uops = { { _STORE_SUBSCR, OPARG_SIMPLE, 0 } } }, - [STORE_SUBSCR_DICT] = { .nuops = 2, .uops = { { _GUARD_NOS_DICT, OPARG_SIMPLE, 0 }, { _STORE_SUBSCR_DICT, OPARG_SIMPLE, 1 } } }, + [STORE_SUBSCR_DICT] = { .nuops = 3, .uops = { { _GUARD_NOS_DICT, OPARG_SIMPLE, 0 }, { _STORE_SUBSCR_DICT, OPARG_SIMPLE, 1 }, { _POP_TOP, OPARG_SIMPLE, 1 } } }, [STORE_SUBSCR_LIST_INT] = { .nuops = 5, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_LIST, OPARG_SIMPLE, 0 }, { _STORE_SUBSCR_LIST_INT, OPARG_SIMPLE, 1 }, { _POP_TOP_INT, OPARG_SIMPLE, 1 }, { _POP_TOP, OPARG_SIMPLE, 1 } } }, [SWAP] = { .nuops = 1, .uops = { { _SWAP, OPARG_SIMPLE, 0 } } }, [TO_BOOL] = { .nuops = 1, .uops = { { _TO_BOOL, OPARG_SIMPLE, 2 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index febbb54b6073..42793d940b51 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -1070,7 +1070,7 @@ extern "C" { #define _STORE_NAME_r10 1263 #define _STORE_SLICE_r30 1264 #define _STORE_SUBSCR_r30 1265 -#define _STORE_SUBSCR_DICT_r30 1266 +#define _STORE_SUBSCR_DICT_r31 1266 #define _STORE_SUBSCR_LIST_INT_r32 1267 #define _SWAP_r11 1268 #define _SWAP_2_r02 1269 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 7c774e747de8..ec47c526ff12 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -1274,7 +1274,7 @@ const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1] = { { -1, -1, -1 }, { -1, -1, -1 }, { -1, -1, -1 }, - { 0, 0, _STORE_SUBSCR_DICT_r30 }, + { 1, 0, _STORE_SUBSCR_DICT_r31 }, }, }, [_DELETE_SUBSCR] = { @@ -3499,7 +3499,7 @@ const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1] = { [_SET_ADD_r10] = _SET_ADD, [_STORE_SUBSCR_r30] = _STORE_SUBSCR, [_STORE_SUBSCR_LIST_INT_r32] = _STORE_SUBSCR_LIST_INT, - [_STORE_SUBSCR_DICT_r30] = _STORE_SUBSCR_DICT, + [_STORE_SUBSCR_DICT_r31] = _STORE_SUBSCR_DICT, [_DELETE_SUBSCR_r20] = _DELETE_SUBSCR, [_CALL_INTRINSIC_1_r11] = _CALL_INTRINSIC_1, [_CALL_INTRINSIC_2_r21] = _CALL_INTRINSIC_2, @@ -4873,7 +4873,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] = { [_STORE_SUBSCR] = "_STORE_SUBSCR", [_STORE_SUBSCR_r30] = "_STORE_SUBSCR_r30", [_STORE_SUBSCR_DICT] = "_STORE_SUBSCR_DICT", - [_STORE_SUBSCR_DICT_r30] = "_STORE_SUBSCR_DICT_r30", + [_STORE_SUBSCR_DICT_r31] = "_STORE_SUBSCR_DICT_r31", [_STORE_SUBSCR_LIST_INT] = "_STORE_SUBSCR_LIST_INT", [_STORE_SUBSCR_LIST_INT_r32] = "_STORE_SUBSCR_LIST_INT_r32", [_SWAP] = "_SWAP", diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 0063c48b0b95..e17367ca71ea 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2525,10 +2525,29 @@ class TestUopsOptimization(unittest.TestCase): self.assertEqual(res, 10) self.assertIsNotNone(ex) uops = get_opnames(ex) + self.assertIn("_STORE_SUBSCR_LIST_INT", uops) self.assertNotIn("_POP_TOP", uops) self.assertNotIn("_POP_TOP_INT", uops) self.assertIn("_POP_TOP_NOP", uops) + def test_store_susbscr_dict(self): + def testfunc(n): + d = {} + for _ in range(n): + d['a'] = 1 + d['b'] = 2 + d['c'] = 3 + d['d'] = 4 + return sum(d.values()) + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, 10) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_STORE_SUBSCR_DICT", uops) + self.assertNotIn("_POP_TOP", uops) + self.assertIn("_POP_TOP_NOP", uops) + def test_attr_promotion_failure(self): # We're not testing for any specific uops here, just # testing it doesn't crash. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index ba94a4c57b14..3e6147961f18 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1158,9 +1158,9 @@ dummy_func( } macro(STORE_SUBSCR_DICT) = - _GUARD_NOS_DICT + unused/1 + _STORE_SUBSCR_DICT; + _GUARD_NOS_DICT + unused/1 + _STORE_SUBSCR_DICT + POP_TOP; - op(_STORE_SUBSCR_DICT, (value, dict_st, sub -- )) { + op(_STORE_SUBSCR_DICT, (value, dict_st, sub -- st)) { PyObject *dict = PyStackRef_AsPyObjectBorrow(dict_st); assert(PyDict_CheckExact(dict)); @@ -1168,8 +1168,12 @@ dummy_func( int err = _PyDict_SetItem_Take2((PyDictObject *)dict, PyStackRef_AsPyObjectSteal(sub), PyStackRef_AsPyObjectSteal(value)); - PyStackRef_CLOSE(dict_st); - ERROR_IF(err); + if (err) { + PyStackRef_CLOSE(dict_st); + ERROR_IF(1); + } + DEAD(dict_st); + st = dict_st; } inst(DELETE_SUBSCR, (container, sub --)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 41b72ff56256..e7f6bb2ed0ce 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5765,12 +5765,13 @@ break; } - case _STORE_SUBSCR_DICT_r30: { + case _STORE_SUBSCR_DICT_r31: { CHECK_CURRENT_CACHED_VALUES(3); assert(WITHIN_STACK_BOUNDS_WITH_CACHE()); _PyStackRef sub; _PyStackRef dict_st; _PyStackRef value; + _PyStackRef st; _PyStackRef _stack_item_0 = _tos_cache0; _PyStackRef _stack_item_1 = _tos_cache1; _PyStackRef _stack_item_2 = _tos_cache2; @@ -5790,19 +5791,22 @@ PyStackRef_AsPyObjectSteal(sub), PyStackRef_AsPyObjectSteal(value)); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -3; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(dict_st); - stack_pointer = _PyFrame_GetStackPointer(frame); if (err) { + stack_pointer += -3; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(dict_st); + stack_pointer = _PyFrame_GetStackPointer(frame); SET_CURRENT_CACHED_VALUES(0); JUMP_TO_ERROR(); } - _tos_cache0 = PyStackRef_ZERO_BITS; + st = dict_st; + _tos_cache0 = st; _tos_cache1 = PyStackRef_ZERO_BITS; _tos_cache2 = PyStackRef_ZERO_BITS; - SET_CURRENT_CACHED_VALUES(0); + SET_CURRENT_CACHED_VALUES(1); + stack_pointer += -3; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); assert(WITHIN_STACK_BOUNDS_WITH_CACHE()); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 006c06b67732..9c828cba877a 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -10987,6 +10987,7 @@ _PyStackRef value; _PyStackRef dict_st; _PyStackRef sub; + _PyStackRef st; // _GUARD_NOS_DICT { nos = stack_pointer[-2]; @@ -11011,14 +11012,24 @@ PyStackRef_AsPyObjectSteal(sub), PyStackRef_AsPyObjectSteal(value)); stack_pointer = _PyFrame_GetStackPointer(frame); + if (err) { + stack_pointer += -3; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(dict_st); + stack_pointer = _PyFrame_GetStackPointer(frame); + JUMP_TO_LABEL(error); + } + st = dict_st; + } + // _POP_TOP + { + value = st; stack_pointer += -3; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(dict_st); + PyStackRef_XCLOSE(value); stack_pointer = _PyFrame_GetStackPointer(frame); - if (err) { - JUMP_TO_LABEL(error); - } } DISPATCH(); } diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index faed3e81cbc5..5023f84213b3 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -109,6 +109,11 @@ dummy_func(void) { ss = sub_st; } + op(_STORE_SUBSCR_DICT, (value, dict_st, sub -- st)) { + (void)value; + st = dict_st; + } + op(_PUSH_NULL, (-- res)) { res = sym_new_null(ctx); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index a833367aea64..72564ea32db7 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1148,8 +1148,16 @@ } case _STORE_SUBSCR_DICT: { - CHECK_STACK_BOUNDS(-3); - stack_pointer += -3; + JitOptRef dict_st; + JitOptRef value; + JitOptRef st; + dict_st = stack_pointer[-2]; + value = stack_pointer[-3]; + (void)value; + st = dict_st; + CHECK_STACK_BOUNDS(-2); + stack_pointer[-3] = st; + stack_pointer += -2; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; } -- 2.47.3