]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-115999: Enable BINARY_SUBSCR_GETITEM for free-threaded build (gh-127737)
authorDonghee Na <donghee.na@python.org>
Thu, 19 Dec 2024 02:08:17 +0000 (11:08 +0900)
committerGitHub <noreply@github.com>
Thu, 19 Dec 2024 02:08:17 +0000 (11:08 +0900)
12 files changed:
Include/internal/pycore_opcode_metadata.h
Include/internal/pycore_typeobject.h
Include/internal/pycore_uop_metadata.h
Lib/test/test_opcache.py
Objects/typeobject.c
Programs/test_frozenmain.h
Python/bytecodes.c
Python/executor_cases.c.h
Python/generated_cases.c.h
Python/optimizer_bytecodes.c
Python/optimizer_cases.c.h
Python/specialize.c

index 28aa1120414337031b1e739cc5b18bf425854652..d2ae8928a8fe8f3d23e223f2cfba6a7455312698 100644 (file)
@@ -994,7 +994,7 @@ int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect)  {
             return 0;
         }
         case BINARY_SUBSCR: {
-            *effect = 0;
+            *effect = 1;
             return 0;
         }
         case BINARY_SUBSCR_DICT: {
@@ -1002,7 +1002,7 @@ int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect)  {
             return 0;
         }
         case BINARY_SUBSCR_GETITEM: {
-            *effect = 0;
+            *effect = 1;
             return 0;
         }
         case BINARY_SUBSCR_LIST_INT: {
index 7b39d07f976ee32d5b0bd7cf42c7d5b4d4b29510..581153344a8e0541451ef7a5e8a366ac9231de05 100644 (file)
@@ -278,6 +278,7 @@ typedef int (*_py_validate_type)(PyTypeObject *);
 // and if the validation is passed, it will set the ``tp_version`` as valid
 // tp_version_tag from the ``ty``.
 extern int _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version);
+extern int _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor, uint32_t tp_version);
 
 #ifdef __cplusplus
 }
index dd775d3f7d3cdd33e7f249e0c0554f753e95f8cb..eadfda472a72701e1e479de14a126e7076c274e9 100644 (file)
@@ -722,7 +722,7 @@ int _PyUop_num_popped(int opcode, int oparg)
         case _BINARY_SUBSCR_CHECK_FUNC:
             return 0;
         case _BINARY_SUBSCR_INIT_CALL:
-            return 2;
+            return 3;
         case _LIST_APPEND:
             return 1;
         case _SET_ADD:
index 0a7557adc4763bc3c0a379421a50d28e028ac43e..94709e2022550a989c9e916fbdd1a9db491c3fdf 100644 (file)
@@ -1069,7 +1069,7 @@ class TestRacesDoNotCrash(TestBase):
         opname = "STORE_SUBSCR_LIST_INT"
         self.assert_races_do_not_crash(opname, get_items, read, write)
 
-    @requires_specialization
+    @requires_specialization_ft
     def test_unpack_sequence_list(self):
         def get_items():
             items = []
@@ -1245,6 +1245,14 @@ class TestInstanceDict(unittest.TestCase):
         f(test_obj, 1)
         self.assertEqual(test_obj.b, 0)
 
+# gh-127274: BINARY_SUBSCR_GETITEM will only cache __getitem__ methods that
+# are deferred. We only defer functions defined at the top-level.
+class CGetItem:
+    def __init__(self, val):
+        self.val = val
+    def __getitem__(self, item):
+        return self.val
+
 
 class TestSpecializer(TestBase):
 
@@ -1520,6 +1528,15 @@ class TestSpecializer(TestBase):
         self.assert_specialized(binary_subscr_str_int, "BINARY_SUBSCR_STR_INT")
         self.assert_no_opcode(binary_subscr_str_int, "BINARY_SUBSCR")
 
+        def binary_subscr_getitems():
+            items = [CGetItem(i) for i in range(100)]
+            for i in range(100):
+                self.assertEqual(items[i][i], i)
+
+        binary_subscr_getitems()
+        self.assert_specialized(binary_subscr_getitems, "BINARY_SUBSCR_GETITEM")
+        self.assert_no_opcode(binary_subscr_getitems, "BINARY_SUBSCR")
+
 
 if __name__ == "__main__":
     unittest.main()
index 2068d6aa9be52b608c304df742fb2b40e0e57e4b..7f95b519561e68835fc552f63f648be5fb3da5e7 100644 (file)
@@ -5679,6 +5679,31 @@ _PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init,
     return can_cache;
 }
 
+int
+_PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor, uint32_t tp_version)
+{
+    if (!descriptor || !tp_version) {
+        return 0;
+    }
+    int can_cache;
+    BEGIN_TYPE_LOCK();
+    can_cache = ((PyTypeObject*)ht)->tp_version_tag == tp_version;
+    // This pointer is invalidated by PyType_Modified (see the comment on
+    // struct _specialization_cache):
+    PyFunctionObject *func = (PyFunctionObject *)descriptor;
+    uint32_t version = _PyFunction_GetVersionForCurrentState(func);
+    can_cache = can_cache && _PyFunction_IsVersionValid(version);
+#ifdef Py_GIL_DISABLED
+    can_cache = can_cache && _PyObject_HasDeferredRefcount(descriptor);
+#endif
+    if (can_cache) {
+        FT_ATOMIC_STORE_PTR_RELEASE(ht->_spec_cache.getitem, descriptor);
+        FT_ATOMIC_STORE_UINT32_RELAXED(ht->_spec_cache.getitem_version, version);
+    }
+    END_TYPE_LOCK();
+    return can_cache;
+}
+
 static void
 set_flags(PyTypeObject *self, unsigned long mask, unsigned long flags)
 {
index c936622c020e3cca2b8cc23058233a5b59b021bf..99b0fa48e01c8b100fe1b9c2e5c822c9e2b83d67 100644 (file)
@@ -1,6 +1,6 @@
 // Auto-generated by Programs/freeze_test_frozenmain.py
 unsigned char M_test_frozenmain[] = {
-    227,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,
+    227,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,
     0,0,0,0,0,243,168,0,0,0,149,0,89,0,79,0,
     70,0,111,0,89,0,79,0,70,1,111,1,88,2,31,0,
     79,1,49,1,0,0,0,0,0,0,29,0,88,2,31,0,
index 772b46d17ec19835ac3b9c60b1d181689d4cc0f8..b67264f04408698056d856b0f581205a031526aa 100644 (file)
@@ -865,26 +865,24 @@ dummy_func(
             res = PyStackRef_FromPyObjectSteal(res_o);
         }
 
-        op(_BINARY_SUBSCR_CHECK_FUNC, (container, unused -- container, unused)) {
+        op(_BINARY_SUBSCR_CHECK_FUNC, (container, unused -- container, unused, getitem)) {
             PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container));
             DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE));
             PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
-            PyObject *getitem = ht->_spec_cache.getitem;
-            DEOPT_IF(getitem == NULL);
-            assert(PyFunction_Check(getitem));
-            uint32_t cached_version = ht->_spec_cache.getitem_version;
-            DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version);
-            PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem);
+            PyObject *getitem_o = FT_ATOMIC_LOAD_PTR_ACQUIRE(ht->_spec_cache.getitem);
+            DEOPT_IF(getitem_o == NULL);
+            assert(PyFunction_Check(getitem_o));
+            uint32_t cached_version = FT_ATOMIC_LOAD_UINT32_RELAXED(ht->_spec_cache.getitem_version);
+            DEOPT_IF(((PyFunctionObject *)getitem_o)->func_version != cached_version);
+            PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem_o);
             assert(code->co_argcount == 2);
             DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize));
+            getitem = PyStackRef_FromPyObjectNew(getitem_o);
             STAT_INC(BINARY_SUBSCR, hit);
         }
 
-        op(_BINARY_SUBSCR_INIT_CALL, (container, sub -- new_frame: _PyInterpreterFrame* )) {
-            PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container));
-            PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
-            PyObject *getitem = ht->_spec_cache.getitem;
-            new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(getitem), 2, frame);
+        op(_BINARY_SUBSCR_INIT_CALL, (container, sub, getitem -- new_frame: _PyInterpreterFrame* )) {
+            new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame);
             new_frame->localsplus[0] = container;
             new_frame->localsplus[1] = sub;
             INPUTS_DEAD();
index 55e9c3aa2db64d87491f0983669f14fea83178fb..de61a64a6e337413d6fafdc97f05533c00d21ec8 100644 (file)
 
         case _BINARY_SUBSCR_CHECK_FUNC: {
             _PyStackRef container;
+            _PyStackRef getitem;
             container = stack_pointer[-2];
             PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container));
             if (!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE)) {
                 JUMP_TO_JUMP_TARGET();
             }
             PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
-            PyObject *getitem = ht->_spec_cache.getitem;
-            if (getitem == NULL) {
+            PyObject *getitem_o = FT_ATOMIC_LOAD_PTR_ACQUIRE(ht->_spec_cache.getitem);
+            if (getitem_o == NULL) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
-            assert(PyFunction_Check(getitem));
-            uint32_t cached_version = ht->_spec_cache.getitem_version;
-            if (((PyFunctionObject *)getitem)->func_version != cached_version) {
+            assert(PyFunction_Check(getitem_o));
+            uint32_t cached_version = FT_ATOMIC_LOAD_UINT32_RELAXED(ht->_spec_cache.getitem_version);
+            if (((PyFunctionObject *)getitem_o)->func_version != cached_version) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
-            PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem);
+            PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem_o);
             assert(code->co_argcount == 2);
             if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
+            getitem = PyStackRef_FromPyObjectNew(getitem_o);
             STAT_INC(BINARY_SUBSCR, hit);
+            stack_pointer[0] = getitem;
+            stack_pointer += 1;
+            assert(WITHIN_STACK_BOUNDS());
             break;
         }
 
         case _BINARY_SUBSCR_INIT_CALL: {
+            _PyStackRef getitem;
             _PyStackRef sub;
             _PyStackRef container;
             _PyInterpreterFrame *new_frame;
-            sub = stack_pointer[-1];
-            container = stack_pointer[-2];
-            PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container));
-            PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
-            PyObject *getitem = ht->_spec_cache.getitem;
-            new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(getitem), 2, frame);
+            getitem = stack_pointer[-1];
+            sub = stack_pointer[-2];
+            container = stack_pointer[-3];
+            new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame);
             new_frame->localsplus[0] = container;
             new_frame->localsplus[1] = sub;
             frame->return_offset = 2 ;
-            stack_pointer[-2].bits = (uintptr_t)new_frame;
-            stack_pointer += -1;
+            stack_pointer[-3].bits = (uintptr_t)new_frame;
+            stack_pointer += -2;
             assert(WITHIN_STACK_BOUNDS());
             break;
         }
index 94343f953221eb052b2c20b0dac20b30e5cf78ae..8a89ba890fd9c9fb36417fd4de9ef38edd72b0f7 100644 (file)
             INSTRUCTION_STATS(BINARY_SUBSCR_GETITEM);
             static_assert(INLINE_CACHE_ENTRIES_BINARY_SUBSCR == 1, "incorrect cache size");
             _PyStackRef container;
+            _PyStackRef getitem;
             _PyStackRef sub;
             _PyInterpreterFrame *new_frame;
             /* Skip 1 cache entry */
                 PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container));
                 DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE), BINARY_SUBSCR);
                 PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
-                PyObject *getitem = ht->_spec_cache.getitem;
-                DEOPT_IF(getitem == NULL, BINARY_SUBSCR);
-                assert(PyFunction_Check(getitem));
-                uint32_t cached_version = ht->_spec_cache.getitem_version;
-                DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version, BINARY_SUBSCR);
-                PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem);
+                PyObject *getitem_o = FT_ATOMIC_LOAD_PTR_ACQUIRE(ht->_spec_cache.getitem);
+                DEOPT_IF(getitem_o == NULL, BINARY_SUBSCR);
+                assert(PyFunction_Check(getitem_o));
+                uint32_t cached_version = FT_ATOMIC_LOAD_UINT32_RELAXED(ht->_spec_cache.getitem_version);
+                DEOPT_IF(((PyFunctionObject *)getitem_o)->func_version != cached_version, BINARY_SUBSCR);
+                PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem_o);
                 assert(code->co_argcount == 2);
                 DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR);
+                getitem = PyStackRef_FromPyObjectNew(getitem_o);
                 STAT_INC(BINARY_SUBSCR, hit);
             }
             // _BINARY_SUBSCR_INIT_CALL
             {
                 sub = stack_pointer[-1];
-                PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container));
-                PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
-                PyObject *getitem = ht->_spec_cache.getitem;
-                new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(getitem), 2, frame);
+                new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame);
                 new_frame->localsplus[0] = container;
                 new_frame->localsplus[1] = sub;
                 frame->return_offset = 2 ;
index 0b8aff02367e314b0fcaf8b7073155132cf7a2dd..e60c0d38425bfe5536fff10ba7f4b97d1c36b0a0 100644 (file)
@@ -349,9 +349,10 @@ dummy_func(void) {
         GETLOCAL(this_instr->operand0) = res;
     }
 
-    op(_BINARY_SUBSCR_INIT_CALL, (container, sub -- new_frame: _Py_UOpsAbstractFrame *)) {
+    op(_BINARY_SUBSCR_INIT_CALL, (container, sub, getitem  -- new_frame: _Py_UOpsAbstractFrame *)) {
         (void)container;
         (void)sub;
+        (void)getitem;
         new_frame = NULL;
         ctx->done = true;
     }
index f4fbe8c8aa0480e2c6e11ee09517676ace9c8f8f..33b34d6fa0d3f90701a31f174cf2b8445cf1dc7a 100644 (file)
         }
 
         case _BINARY_SUBSCR_CHECK_FUNC: {
+            _Py_UopsSymbol *getitem;
+            getitem = sym_new_not_null(ctx);
+            stack_pointer[0] = getitem;
+            stack_pointer += 1;
+            assert(WITHIN_STACK_BOUNDS());
             break;
         }
 
         case _BINARY_SUBSCR_INIT_CALL: {
+            _Py_UopsSymbol *getitem;
             _Py_UopsSymbol *sub;
             _Py_UopsSymbol *container;
             _Py_UOpsAbstractFrame *new_frame;
-            sub = stack_pointer[-1];
-            container = stack_pointer[-2];
+            getitem = stack_pointer[-1];
+            sub = stack_pointer[-2];
+            container = stack_pointer[-3];
             (void)container;
             (void)sub;
+            (void)getitem;
             new_frame = NULL;
             ctx->done = true;
-            stack_pointer[-2] = (_Py_UopsSymbol *)new_frame;
-            stack_pointer += -1;
+            stack_pointer[-3] = (_Py_UopsSymbol *)new_frame;
+            stack_pointer += -2;
             assert(WITHIN_STACK_BOUNDS());
             break;
         }
index 6eb298217ec2d36de1ceda7efbaab4ef611ec07c..6c45320f95db8e7f3c2fa590a463d3264eb5a61c 100644 (file)
@@ -1096,6 +1096,7 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na
                 SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_ATTR_METHOD);
                 return -1;
             }
+            /* Don't specialize if PEP 523 is active */
             if (_PyInterpreterState_GET()->eval_frame) {
                 SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OTHER);
                 return -1;
@@ -1165,6 +1166,7 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na
             if (version == 0) {
                 return -1;
             }
+            /* Don't specialize if PEP 523 is active */
             if (_PyInterpreterState_GET()->eval_frame) {
                 SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OTHER);
                 return -1;
@@ -1781,12 +1783,12 @@ _Py_Specialize_BinarySubscr(
         specialized_op = BINARY_SUBSCR_DICT;
         goto success;
     }
-#ifndef Py_GIL_DISABLED
-    PyTypeObject *cls = Py_TYPE(container);
-    PyObject *descriptor = _PyType_Lookup(cls, &_Py_ID(__getitem__));
+    unsigned int tp_version;
+    PyObject *descriptor = _PyType_LookupRefAndVersion(container_type, &_Py_ID(__getitem__), &tp_version);
     if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) {
         if (!(container_type->tp_flags & Py_TPFLAGS_HEAPTYPE)) {
             SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_SUBSCR_NOT_HEAP_TYPE);
+            Py_DECREF(descriptor);
             goto fail;
         }
         PyFunctionObject *func = (PyFunctionObject *)descriptor;
@@ -1794,30 +1796,29 @@ _Py_Specialize_BinarySubscr(
         int kind = function_kind(fcode);
         if (kind != SIMPLE_FUNCTION) {
             SPECIALIZATION_FAIL(BINARY_SUBSCR, kind);
+            Py_DECREF(descriptor);
             goto fail;
         }
         if (fcode->co_argcount != 2) {
             SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS);
+            Py_DECREF(descriptor);
             goto fail;
         }
-        uint32_t version = _PyFunction_GetVersionForCurrentState(func);
-        if (!_PyFunction_IsVersionValid(version)) {
-            SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_VERSIONS);
-            goto fail;
-        }
+
+        PyHeapTypeObject *ht = (PyHeapTypeObject *)container_type;
+        /* Don't specialize if PEP 523 is active */
         if (_PyInterpreterState_GET()->eval_frame) {
             SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OTHER);
+            Py_DECREF(descriptor);
             goto fail;
         }
-        PyHeapTypeObject *ht = (PyHeapTypeObject *)container_type;
-        // This pointer is invalidated by PyType_Modified (see the comment on
-        // struct _specialization_cache):
-        ht->_spec_cache.getitem = descriptor;
-        ht->_spec_cache.getitem_version = version;
-        specialized_op = BINARY_SUBSCR_GETITEM;
-        goto success;
+        if (_PyType_CacheGetItemForSpecialization(ht, descriptor, (uint32_t)tp_version)) {
+            specialized_op = BINARY_SUBSCR_GETITEM;
+            Py_DECREF(descriptor);
+            goto success;
+        }
     }
-#endif   // Py_GIL_DISABLED
+    Py_XDECREF(descriptor);
     SPECIALIZATION_FAIL(BINARY_SUBSCR,
                         binary_subscr_fail_kind(container_type, sub));
 fail:
@@ -2617,6 +2618,7 @@ _Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int oparg)
         assert(instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == END_FOR  ||
             instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == INSTRUMENTED_END_FOR
         );
+        /* Don't specialize if PEP 523 is active */
         if (_PyInterpreterState_GET()->eval_frame) {
             SPECIALIZATION_FAIL(FOR_ITER, SPEC_FAIL_OTHER);
             goto failure;
@@ -2645,6 +2647,7 @@ _Py_Specialize_Send(_PyStackRef receiver_st, _Py_CODEUNIT *instr)
     assert(_PyOpcode_Caches[SEND] == INLINE_CACHE_ENTRIES_SEND);
     PyTypeObject *tp = Py_TYPE(receiver);
     if (tp == &PyGen_Type || tp == &PyCoro_Type) {
+        /* Don't specialize if PEP 523 is active */
         if (_PyInterpreterState_GET()->eval_frame) {
             SPECIALIZATION_FAIL(SEND, SPEC_FAIL_OTHER);
             goto failure;