]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-148276: Optimize object creation and method calls in the JIT by resolving __init__...
authorPieter Eendebak <pieter.eendebak@gmail.com>
Sat, 11 Apr 2026 14:22:42 +0000 (16:22 +0200)
committerGitHub <noreply@github.com>
Sat, 11 Apr 2026 14:22:42 +0000 (22:22 +0800)
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Co-authored-by: Ken Jin <kenjin4096@gmail.com>
Include/internal/pycore_opcode_metadata.h
Lib/test/test_capi/test_opt.py
Python/bytecodes.c
Python/optimizer_bytecodes.c
Python/optimizer_cases.c.h
Python/record_functions.c.h

index 6d397af38a8f4d42e90190cb6870de1775f7991e..9bd78f67c6e2d476d6cf860000b6307966ba045d 100644 (file)
@@ -1289,7 +1289,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = {
     [SET_FUNCTION_ATTRIBUTE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG },
     [SET_UPDATE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG },
     [STORE_ATTR] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
-    [STORE_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IXC000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
+    [STORE_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IXC000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG | HAS_RECORDS_VALUE_FLAG },
     [STORE_ATTR_SLOT] = { true, INSTR_FMT_IXC000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG | HAS_RECORDS_VALUE_FLAG },
     [STORE_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG | HAS_RECORDS_VALUE_FLAG },
     [STORE_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ESCAPES_FLAG },
@@ -1501,7 +1501,7 @@ _PyOpcode_macro_expansion[256] = {
     [SET_FUNCTION_ATTRIBUTE] = { .nuops = 1, .uops = { { _SET_FUNCTION_ATTRIBUTE, OPARG_SIMPLE, 0 } } },
     [SET_UPDATE] = { .nuops = 2, .uops = { { _SET_UPDATE, OPARG_SIMPLE, 0 }, { _POP_TOP, OPARG_SIMPLE, 0 } } },
     [STORE_ATTR] = { .nuops = 1, .uops = { { _STORE_ATTR, OPARG_SIMPLE, 3 } } },
-    [STORE_ATTR_INSTANCE_VALUE] = { .nuops = 5, .uops = { { _LOCK_OBJECT, OPARG_SIMPLE, 1 }, { _GUARD_TYPE_VERSION_LOCKED, 2, 1 }, { _GUARD_DORV_NO_DICT, OPARG_SIMPLE, 3 }, { _STORE_ATTR_INSTANCE_VALUE, 1, 3 }, { _POP_TOP, OPARG_SIMPLE, 4 } } },
+    [STORE_ATTR_INSTANCE_VALUE] = { .nuops = 6, .uops = { { _RECORD_TOS_TYPE, OPARG_SIMPLE, 1 }, { _LOCK_OBJECT, OPARG_SIMPLE, 1 }, { _GUARD_TYPE_VERSION_LOCKED, 2, 1 }, { _GUARD_DORV_NO_DICT, OPARG_SIMPLE, 3 }, { _STORE_ATTR_INSTANCE_VALUE, 1, 3 }, { _POP_TOP, OPARG_SIMPLE, 4 } } },
     [STORE_ATTR_SLOT] = { .nuops = 4, .uops = { { _RECORD_TOS_TYPE, OPARG_SIMPLE, 1 }, { _GUARD_TYPE_VERSION, 2, 1 }, { _STORE_ATTR_SLOT, 1, 3 }, { _POP_TOP, OPARG_SIMPLE, 4 } } },
     [STORE_ATTR_WITH_HINT] = { .nuops = 4, .uops = { { _RECORD_TOS_TYPE, OPARG_SIMPLE, 1 }, { _GUARD_TYPE_VERSION, 2, 1 }, { _STORE_ATTR_WITH_HINT, 1, 3 }, { _POP_TOP, OPARG_SIMPLE, 4 } } },
     [STORE_DEREF] = { .nuops = 1, .uops = { { _STORE_DEREF, OPARG_SIMPLE, 0 } } },
index 695184026c54d4519857db64536a85dadf2b49b8..6d8ad8798de5e2964c47e3490663e9c09879443f 100644 (file)
@@ -1521,6 +1521,29 @@ class TestUopsOptimization(unittest.TestCase):
         Foo.attr = 0
         self.assertFalse(ex.is_valid())
 
+    def test_guard_type_version_locked_removed(self):
+        """
+        Verify that redundant _GUARD_TYPE_VERSION_LOCKED guards are
+        eliminated for sequential STORE_ATTR_INSTANCE_VALUE in __init__.
+        """
+
+        class Foo:
+            def __init__(self):
+                self.a = 1
+                self.b = 2
+                self.c = 3
+
+        def thing(n):
+            for _ in range(n):
+                Foo()
+
+        res, ex = self._run_with_optimizer(thing, TIER2_THRESHOLD)
+        self.assertIsNotNone(ex)
+        opnames = list(iter_opnames(ex))
+        guard_locked_count = opnames.count("_GUARD_TYPE_VERSION_LOCKED")
+        # Only the first store needs the guard; the rest should be NOPed.
+        self.assertEqual(guard_locked_count, 1)
+
     def test_type_version_doesnt_segfault(self):
         """
         Tests that setting a type version doesn't cause a segfault when later looking at the stack.
@@ -1542,6 +1565,98 @@ class TestUopsOptimization(unittest.TestCase):
 
         fn(A())
 
+    def test_init_resolves_callable(self):
+        """
+        _CHECK_AND_ALLOCATE_OBJECT should resolve __init__ to a constant,
+        enabling the optimizer to propagate type information through the frame
+        and eliminate redundant function version and arg count checks.
+        """
+        class MyPoint:
+            def __init__(self, x, y):
+                # If __init__ callable is propagated through, then
+                # These will get promoted from globals to constants.
+                self.x = range(1)
+                self.y = range(1)
+
+        def testfunc(n):
+            for _ in range(n):
+                p = MyPoint(1.0, 2.0)
+
+        _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        # The __init__ call should be traced through via _PUSH_FRAME
+        self.assertIn("_PUSH_FRAME", uops)
+        # __init__ resolution allows promotion of range to constant
+        self.assertNotIn("_LOAD_GLOBAL_BUILTINS", uops)
+
+    def test_guard_type_version_locked_propagates(self):
+        """
+        _GUARD_TYPE_VERSION_LOCKED should set the type version on the
+        symbol so repeated accesses to the same type can benefit.
+        """
+        class Item:
+            def __init__(self, val):
+                self.val = val
+
+            def get(self):
+                return self.val
+
+            def get2(self):
+                return self.val + 1
+
+        def testfunc(n):
+            item = Item(42)
+            total = 0
+            for _ in range(n):
+                # Two method calls on the same object — the second
+                # should benefit from type info set by the first.
+                total += item.get() + item.get2()
+            return total
+
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertEqual(res, TIER2_THRESHOLD * (42 + 43))
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        # Both methods should be traced through
+        self.assertEqual(uops.count("_PUSH_FRAME"), 2)
+        # Type version propagation: one guard covers both method lookups
+        self.assertEqual(uops.count("_GUARD_TYPE_VERSION"), 1)
+        # Function checks eliminated (type info resolves the callable)
+        self.assertNotIn("_CHECK_FUNCTION_VERSION", uops)
+        self.assertNotIn("_CHECK_FUNCTION_EXACT_ARGS", uops)
+
+    def test_method_chain_guard_elimination(self):
+        """
+        Calling two methods on the same object should share the outer
+        type guard — only one _GUARD_TYPE_VERSION for the two lookups.
+        """
+        class Calc:
+            def __init__(self, val):
+                self.val = val
+
+            def add(self, x):
+                self.val += x
+                return self
+
+        def testfunc(n):
+            c = Calc(0)
+            for _ in range(n):
+                c.add(1).add(2)
+            return c.val
+
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertEqual(res, TIER2_THRESHOLD * 3)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        # Both add() calls should be inlined
+        push_count = uops.count("_PUSH_FRAME")
+        self.assertEqual(push_count, 2)
+        # Only one outer type version guard for the two method lookups
+        # on the same object c (the second lookup reuses type info)
+        guard_version_count = uops.count("_GUARD_TYPE_VERSION")
+        self.assertEqual(guard_version_count, 1)
+
     def test_func_guards_removed_or_reduced(self):
         def testfunc(n):
             for i in range(n):
index 24ab7bc4fa02f8a1b5f826bc3cf376f74e0528de..34e3885a93c8bd92ae6b1d544594a7006ade4f00 100644 (file)
@@ -3011,6 +3011,7 @@ dummy_func(
 
         macro(STORE_ATTR_INSTANCE_VALUE) =
             unused/1 +
+            _RECORD_TOS_TYPE +
             _LOCK_OBJECT +
             _GUARD_TYPE_VERSION_LOCKED +
             _GUARD_DORV_NO_DICT +
index 3d58d5e09c4dfa6a60b42710d648fcccc667b20c..9b1a69649978c73e1f99c124a367ef6d9366f8a7 100644 (file)
@@ -134,6 +134,20 @@ dummy_func(void) {
         assert(!PyJitRef_IsUnique(value));
     }
 
+    op(_GUARD_TYPE_VERSION_LOCKED, (type_version/2, owner -- owner)) {
+        assert(type_version);
+        if (sym_matches_type_version(owner, type_version)) {
+            ADD_OP(_NOP, 0, 0);
+        } else {
+            PyTypeObject *probable_type = sym_get_probable_type(owner);
+            if (probable_type->tp_version_tag == type_version && sym_set_type_version(owner, type_version)) {
+                // Promote the probable type version to a known one.
+                PyType_Watch(TYPE_WATCHER_ID, (PyObject *)probable_type);
+                _Py_BloomFilter_Add(dependencies, probable_type);
+            }
+        }
+    }
+
     op(_STORE_ATTR_INSTANCE_VALUE, (offset/1, value, owner -- o)) {
         (void)offset;
         (void)value;
@@ -1043,9 +1057,27 @@ dummy_func(void) {
     }
 
     op(_CHECK_AND_ALLOCATE_OBJECT, (type_version/2, callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) {
-        (void)type_version;
         (void)args;
-        callable = sym_new_not_null(ctx);
+        PyObject *probable_callable = sym_get_probable_value(callable);
+        assert(probable_callable != NULL);
+        assert(PyType_Check(probable_callable));
+        PyTypeObject *tp = (PyTypeObject *)probable_callable;
+        if (tp->tp_version_tag == type_version) {
+            // If the type version has not changed since we last saw it,
+            // then we know this __init__ is definitely the same one as in the cache.
+            // We can promote callable to a known constant. This does not need a
+            // type watcher, as we do not remove this _CHECK_AND_ALLOCATE_OBJECT guard.
+            // TODO: split up _CHECK_AND_ALLOCATE_OBJECT to the check then alloate, so we can
+            // eliminate the check.
+            PyHeapTypeObject *cls = (PyHeapTypeObject *)probable_callable;
+            PyObject *init = cls->_spec_cache.init;
+            assert(init != NULL);
+            assert(PyFunction_Check(init));
+            callable = sym_new_const(ctx, init);
+        }
+        else {
+            callable = sym_new_not_null(ctx);
+        }
         self_or_null = sym_new_not_null(ctx);
     }
 
index 869efd6ba30187b4e3d1c50ee5855f96686fa646..e5c67b6cf0e333c7756b61a6eb3126000d44081b 100644 (file)
         }
 
         case _GUARD_TYPE_VERSION_LOCKED: {
+            JitOptRef owner;
+            owner = stack_pointer[-1];
+            uint32_t type_version = (uint32_t)this_instr->operand0;
+            assert(type_version);
+            if (sym_matches_type_version(owner, type_version)) {
+                ADD_OP(_NOP, 0, 0);
+            } else {
+                PyTypeObject *probable_type = sym_get_probable_type(owner);
+                if (probable_type->tp_version_tag == type_version && sym_set_type_version(owner, type_version)) {
+                    PyType_Watch(TYPE_WATCHER_ID, (PyObject *)probable_type);
+                    _Py_BloomFilter_Add(dependencies, probable_type);
+                }
+            }
             break;
         }
 
             self_or_null = stack_pointer[-1 - oparg];
             callable = stack_pointer[-2 - oparg];
             uint32_t type_version = (uint32_t)this_instr->operand0;
-            (void)type_version;
             (void)args;
-            callable = sym_new_not_null(ctx);
+            PyObject *probable_callable = sym_get_probable_value(callable);
+            assert(probable_callable != NULL);
+            assert(PyType_Check(probable_callable));
+            PyTypeObject *tp = (PyTypeObject *)probable_callable;
+            if (tp->tp_version_tag == type_version) {
+                PyHeapTypeObject *cls = (PyHeapTypeObject *)probable_callable;
+                PyObject *init = cls->_spec_cache.init;
+                assert(init != NULL);
+                assert(PyFunction_Check(init));
+                callable = sym_new_const(ctx, init);
+            }
+            else {
+                callable = sym_new_not_null(ctx);
+            }
             self_or_null = sym_new_not_null(ctx);
             stack_pointer[-2 - oparg] = callable;
             stack_pointer[-1 - oparg] = self_or_null;
index 33fd2f19b1230d627d9dc10f99a027104b97eb51..02b8538bc902b56684b6b1cd20cb346573267879 100644 (file)
@@ -102,6 +102,7 @@ const uint8_t _PyOpcode_RecordFunctionIndices[256] = {
         [LOAD_ATTR_SLOT] = _RECORD_TOS_TYPE_INDEX,
         [LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = _RECORD_TOS_TYPE_INDEX,
         [LOAD_ATTR_PROPERTY] = _RECORD_TOS_TYPE_INDEX,
+        [STORE_ATTR_INSTANCE_VALUE] = _RECORD_TOS_TYPE_INDEX,
         [STORE_ATTR_WITH_HINT] = _RECORD_TOS_TYPE_INDEX,
         [STORE_ATTR_SLOT] = _RECORD_TOS_TYPE_INDEX,
         [FOR_ITER_GEN] = _RECORD_NOS_GEN_FUNC_INDEX,