]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-119258: Eliminate Type Guards in Tier 2 Optimizer with Watcher (GH-119365)
authorSaul Shanabrook <s.shanabrook@gmail.com>
Sat, 8 Jun 2024 09:41:45 +0000 (05:41 -0400)
committerGitHub <noreply@github.com>
Sat, 8 Jun 2024 09:41:45 +0000 (17:41 +0800)
Co-authored-by: parmeggiani <parmeggiani@spaziodati.eu>
Co-authored-by: dpdani <git@danieleparmeggiani.me>
Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
Co-authored-by: Brandt Bucher <brandtbucher@microsoft.com>
Co-authored-by: Ken Jin <kenjin@python.org>
13 files changed:
Include/internal/pycore_optimizer.h
Include/internal/pycore_typeobject.h
Lib/test/test_capi/test_opt.py
Lib/test/test_capi/test_watchers.py
Lib/test/test_type_cache.py
Misc/NEWS.d/next/Core and Builtins/2024-05-23-20-17-37.gh-issue-119258.wZFIpt.rst [new file with mode: 0644]
Modules/_testcapimodule.c
Modules/_testinternalcapi.c
Objects/typeobject.c
Python/optimizer_analysis.c
Python/optimizer_bytecodes.c
Python/optimizer_cases.c.h
Python/optimizer_symbols.c

index 76123987ac99f569a54c36627a5610dba2591de7..fd7833fd23129937d91b53fc01875abb2c981c5d 100644 (file)
@@ -33,6 +33,7 @@ struct _Py_UopsSymbol {
     int flags;  // 0 bits: Top; 2 or more bits: Bottom
     PyTypeObject *typ;  // Borrowed reference
     PyObject *const_val;  // Owned reference (!)
+    unsigned int type_version; // currently stores type version
 };
 
 #define UOP_FORMAT_TARGET 0
@@ -123,9 +124,11 @@ extern _Py_UopsSymbol *_Py_uop_sym_new_const(_Py_UOpsContext *ctx, PyObject *con
 extern _Py_UopsSymbol *_Py_uop_sym_new_null(_Py_UOpsContext *ctx);
 extern bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym);
 extern bool _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ);
+extern bool _Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, unsigned int version);
 extern void _Py_uop_sym_set_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym);
 extern void _Py_uop_sym_set_non_null(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym);
 extern void _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *typ);
+extern bool _Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, unsigned int version);
 extern void _Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const_val);
 extern bool _Py_uop_sym_is_bottom(_Py_UopsSymbol *sym);
 extern int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym);
@@ -138,9 +141,9 @@ extern void _Py_uop_abstractcontext_fini(_Py_UOpsContext *ctx);
 extern _Py_UOpsAbstractFrame *_Py_uop_frame_new(
     _Py_UOpsContext *ctx,
     PyCodeObject *co,
-    _Py_UopsSymbol **localsplus_start,
-    int n_locals_already_filled,
-    int curr_stackentries);
+    int curr_stackentries,
+    _Py_UopsSymbol **args,
+    int arg_len);
 extern int _Py_uop_frame_pop(_Py_UOpsContext *ctx);
 
 PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored);
index 8664ae0e44533f6d2f355c5a04ca7658e10d120f..bc295b1b066bd14f8fa519ff98388c6e76cc173f 100644 (file)
@@ -63,6 +63,8 @@ typedef struct {
     PyObject *tp_weaklist;
 } managed_static_type_state;
 
+#define TYPE_VERSION_CACHE_SIZE (1<<12)  /* Must be a power of 2 */
+
 struct types_state {
     /* Used to set PyTypeObject.tp_version_tag.
        It starts at _Py_MAX_GLOBAL_TYPE_VERSION_TAG + 1,
@@ -118,6 +120,12 @@ struct types_state {
         managed_static_type_state initialized[_Py_MAX_MANAGED_STATIC_EXT_TYPES];
     } for_extensions;
     PyMutex mutex;
+
+    // Borrowed references to type objects whose
+    // tp_version_tag % TYPE_VERSION_CACHE_SIZE
+    // once was equal to the index in the table.
+    // They are cleared when the type object is deallocated.
+    PyTypeObject *type_version_cache[TYPE_VERSION_CACHE_SIZE];
 };
 
 
@@ -230,6 +238,9 @@ extern void _PyType_SetFlags(PyTypeObject *self, unsigned long mask,
 extern void _PyType_SetFlagsRecursive(PyTypeObject *self, unsigned long mask,
                                       unsigned long flags);
 
+extern unsigned int _PyType_GetVersionForCurrentState(PyTypeObject *tp);
+PyAPI_FUNC(void) _PyType_SetVersion(PyTypeObject *tp, unsigned int version);
+PyTypeObject *_PyType_LookupByVersion(unsigned int version);
 
 #ifdef __cplusplus
 }
index 0491ff9b84d486cac0d6f7c94cac698e3b155d08..fc6d8b0a3f01d2819551f79ec16976173744b4a7 100644 (file)
@@ -1333,6 +1333,153 @@ class TestUopsOptimization(unittest.TestCase):
         self.assertIs(type(s), float)
         self.assertEqual(s, 1024.0)
 
+    def test_guard_type_version_removed(self):
+        def thing(a):
+            x = 0
+            for _ in range(100):
+                x += a.attr
+                x += a.attr
+            return x
+
+        class Foo:
+            attr = 1
+
+        res, ex = self._run_with_optimizer(thing, Foo())
+        opnames = list(iter_opnames(ex))
+        self.assertIsNotNone(ex)
+        self.assertEqual(res, 200)
+        guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION")
+        self.assertEqual(guard_type_version_count, 1)
+
+    def test_guard_type_version_removed_inlined(self):
+        """
+        Verify that the guard type version if we have an inlined function
+        """
+
+        def fn():
+            pass
+
+        def thing(a):
+            x = 0
+            for _ in range(100):
+                x += a.attr
+                fn()
+                x += a.attr
+            return x
+
+        class Foo:
+            attr = 1
+
+        res, ex = self._run_with_optimizer(thing, Foo())
+        opnames = list(iter_opnames(ex))
+        self.assertIsNotNone(ex)
+        self.assertEqual(res, 200)
+        guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION")
+        self.assertEqual(guard_type_version_count, 1)
+
+    def test_guard_type_version_not_removed(self):
+        """
+        Verify that the guard type version is not removed if we modify the class
+        """
+
+        def thing(a):
+            x = 0
+            for i in range(100):
+                x += a.attr
+                # for the first 90 iterations we set the attribute on this dummy function which shouldn't
+                # trigger the type watcher
+                # then after 90  it should trigger it and stop optimizing
+                # Note that the code needs to be in this weird form so it's optimized inline without any control flow
+                setattr((Foo, Bar)[i < 90], "attr", 2)
+                x += a.attr
+            return x
+
+        class Foo:
+            attr = 1
+
+        class Bar:
+            pass
+
+        res, ex = self._run_with_optimizer(thing, Foo())
+        opnames = list(iter_opnames(ex))
+
+        self.assertIsNotNone(ex)
+        self.assertEqual(res, 219)
+        guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION")
+        self.assertEqual(guard_type_version_count, 2)
+
+
+    @unittest.expectedFailure
+    def test_guard_type_version_not_removed_escaping(self):
+        """
+        Verify that the guard type version is not removed if have an escaping function
+        """
+
+        def thing(a):
+            x = 0
+            for i in range(100):
+                x += a.attr
+                # eval should be escaping and so should cause optimization to stop and preserve both type versions
+                eval("None")
+                x += a.attr
+            return x
+
+        class Foo:
+            attr = 1
+        res, ex = self._run_with_optimizer(thing, Foo())
+        opnames = list(iter_opnames(ex))
+        self.assertIsNotNone(ex)
+        self.assertEqual(res, 200)
+        guard_type_version_count = opnames.count("_GUARD_TYPE_VERSION")
+        # Note: This will actually be 1 for noe
+        # https://github.com/python/cpython/pull/119365#discussion_r1626220129
+        self.assertEqual(guard_type_version_count, 2)
+
+
+    def test_guard_type_version_executor_invalidated(self):
+        """
+        Verify that the executor is invalided on a type change.
+        """
+
+        def thing(a):
+            x = 0
+            for i in range(100):
+                x += a.attr
+                x += a.attr
+            return x
+
+        class Foo:
+            attr = 1
+
+        res, ex = self._run_with_optimizer(thing, Foo())
+        self.assertEqual(res, 200)
+        self.assertIsNotNone(ex)
+        self.assertEqual(list(iter_opnames(ex)).count("_GUARD_TYPE_VERSION"), 1)
+        self.assertTrue(ex.is_valid())
+        Foo.attr = 0
+        self.assertFalse(ex.is_valid())
+
+    def test_type_version_doesnt_segfault(self):
+        """
+        Tests that setting a type version doesn't cause a segfault when later looking at the stack.
+        """
+
+        # Minimized from mdp.py benchmark
+
+        class A:
+            def __init__(self):
+                self.attr = {}
+
+            def method(self, arg):
+                self.attr[arg] = None
+
+        def fn(a):
+            for _ in range(100):
+                (_ for _ in [])
+                (_ for _ in [a.method(None)])
+
+        fn(A())
+
 
 if __name__ == "__main__":
     unittest.main()
index 90665a7561b3162306a608972dcfa77dd54955bc..709b5e1c4b716a1359bb92c5f0dd6e1046ac09e6 100644 (file)
@@ -282,8 +282,10 @@ class TestTypeWatchers(unittest.TestCase):
             self.watch(wid, C)
             with catch_unraisable_exception() as cm:
                 C.foo = "bar"
-                self.assertEqual(cm.unraisable.err_msg,
-                    f"Exception ignored in type watcher callback #0 for {C!r}")
+                self.assertEqual(
+                    cm.unraisable.err_msg,
+                    f"Exception ignored in type watcher callback #1 for {C!r}",
+                )
                 self.assertIs(cm.unraisable.object, None)
                 self.assertEqual(str(cm.unraisable.exc_value), "boom!")
             self.assert_events([])
index e90e315c8083618f870098fbea5e0719ecd4b607..edaf076707ad8b2f38c003a738e000f93f347c96 100644 (file)
@@ -10,8 +10,9 @@ except ImportError:
 
 # Skip this test if the _testcapi module isn't available.
 _testcapi = import_helper.import_module("_testcapi")
+_testinternalcapi = import_helper.import_module("_testinternalcapi")
 type_get_version = _testcapi.type_get_version
-type_assign_specific_version_unsafe = _testcapi.type_assign_specific_version_unsafe
+type_assign_specific_version_unsafe = _testinternalcapi.type_assign_specific_version_unsafe
 type_assign_version = _testcapi.type_assign_version
 type_modified = _testcapi.type_modified
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-23-20-17-37.gh-issue-119258.wZFIpt.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-23-20-17-37.gh-issue-119258.wZFIpt.rst
new file mode 100644 (file)
index 0000000..68f1ec1
--- /dev/null
@@ -0,0 +1,3 @@
+Eliminate type version guards in the tier two interpreter.\r
+\r
+Note that setting the ``tp_version_tag`` manually (which has never been supported) may result in crashes.
index b58c17260626c2ab96575d447d156bcccb8daf26..b139b46c826a3f8f867ca0a208a5bf179f9a3eeb 100644 (file)
@@ -2403,21 +2403,6 @@ type_modified(PyObject *self, PyObject *type)
     Py_RETURN_NONE;
 }
 
-// Circumvents standard version assignment machinery - use with caution and only on
-// short-lived heap types
-static PyObject *
-type_assign_specific_version_unsafe(PyObject *self, PyObject *args)
-{
-    PyTypeObject *type;
-    unsigned int version;
-    if (!PyArg_ParseTuple(args, "Oi:type_assign_specific_version_unsafe", &type, &version)) {
-        return NULL;
-    }
-    assert(!PyType_HasFeature(type, Py_TPFLAGS_IMMUTABLETYPE));
-    type->tp_version_tag = version;
-    type->tp_flags |= Py_TPFLAGS_VALID_VERSION_TAG;
-    Py_RETURN_NONE;
-}
 
 static PyObject *
 type_assign_version(PyObject *self, PyObject *type)
@@ -3427,8 +3412,6 @@ static PyMethodDef TestMethods[] = {
     {"test_py_is_funcs", test_py_is_funcs, METH_NOARGS},
     {"type_get_version", type_get_version, METH_O, PyDoc_STR("type->tp_version_tag")},
     {"type_modified", type_modified, METH_O, PyDoc_STR("PyType_Modified")},
-    {"type_assign_specific_version_unsafe", type_assign_specific_version_unsafe, METH_VARARGS,
-     PyDoc_STR("forcefully assign type->tp_version_tag")},
     {"type_assign_version", type_assign_version, METH_O, PyDoc_STR("PyUnstable_Type_AssignVersionTag")},
     {"type_get_tp_bases", type_get_tp_bases, METH_O},
     {"type_get_tp_mro", type_get_tp_mro, METH_O},
index 6d4a00c06ca9def474c56842769d17f7d0ada9b0..139a0509795de9b827a18e27a4a9c8f449d5ac01 100644 (file)
@@ -2002,6 +2002,22 @@ has_inline_values(PyObject *self, PyObject *obj)
 }
 
 
+// Circumvents standard version assignment machinery - use with caution and only on
+// short-lived heap types
+static PyObject *
+type_assign_specific_version_unsafe(PyObject *self, PyObject *args)
+{
+    PyTypeObject *type;
+    unsigned int version;
+    if (!PyArg_ParseTuple(args, "Oi:type_assign_specific_version_unsafe", &type, &version)) {
+        return NULL;
+    }
+    assert(!PyType_HasFeature(type, Py_TPFLAGS_IMMUTABLETYPE));
+    _PyType_SetVersion(type, version);
+    type->tp_flags |= Py_TPFLAGS_VALID_VERSION_TAG;
+    Py_RETURN_NONE;
+}
+
 /*[clinic input]
 gh_119213_getargs
 
@@ -2102,6 +2118,9 @@ static PyMethodDef module_functions[] = {
     {"get_rare_event_counters", get_rare_event_counters, METH_NOARGS},
     {"reset_rare_event_counters", reset_rare_event_counters, METH_NOARGS},
     {"has_inline_values", has_inline_values, METH_O},
+    {"type_assign_specific_version_unsafe", type_assign_specific_version_unsafe, METH_VARARGS,
+     PyDoc_STR("forcefully assign type->tp_version_tag")},
+
 #ifdef Py_GIL_DISABLED
     {"py_thread_id", get_py_thread_id, METH_NOARGS},
 #endif
index 880ac6b9c009fef9b5b796c7027feec5d819d3e6..cd16bebd1e1cb84d03c343f3d074c394b3312a6d 100644 (file)
@@ -853,7 +853,8 @@ PyType_AddWatcher(PyType_WatchCallback callback)
 {
     PyInterpreterState *interp = _PyInterpreterState_GET();
 
-    for (int i = 0; i < TYPE_MAX_WATCHERS; i++) {
+    // start at 1, 0 is reserved for cpython optimizer
+    for (int i = 1; i < TYPE_MAX_WATCHERS; i++) {
         if (!interp->type_watchers[i]) {
             interp->type_watchers[i] = callback;
             return i;
@@ -960,7 +961,7 @@ type_modification_starting_unlocked(PyTypeObject *type)
     }
 
     /* 0 is not a valid version tag */
-    _Py_atomic_store_uint32_release(&type->tp_version_tag, 0);
+    _PyType_SetVersion(type, 0);
 }
 
 #endif
@@ -1024,7 +1025,7 @@ type_modified_unlocked(PyTypeObject *type)
     }
 
     type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG;
-    FT_ATOMIC_STORE_UINT32_RELAXED(type->tp_version_tag, 0); /* 0 is not a valid version tag */
+    _PyType_SetVersion(type, 0); /* 0 is not a valid version tag */
     if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
         // This field *must* be invalidated if the type is modified (see the
         // comment on struct _specialization_cache):
@@ -1101,7 +1102,7 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) {
  clear:
     assert(!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN));
     type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG;
-    FT_ATOMIC_STORE_UINT32_RELAXED(type->tp_version_tag, 0); /* 0 is not a valid version tag */
+    _PyType_SetVersion(type, 0); /* 0 is not a valid version tag */
     if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
         // This field *must* be invalidated if the type is modified (see the
         // comment on struct _specialization_cache):
@@ -1109,6 +1110,64 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) {
     }
 }
 
+/*
+The Tier 2 interpreter requires looking up the type object by the type version, so it can install
+watchers to understand when they change.
+
+So we add a global cache from type version to borrowed references of type objects.
+
+This is similar to func_version_cache.
+*/
+
+void
+_PyType_SetVersion(PyTypeObject *tp, unsigned int version)
+{
+#ifndef Py_GIL_DISABLED
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    // lookup the old version and set to null
+    if (tp->tp_version_tag != 0) {
+        PyTypeObject **slot =
+            interp->types.type_version_cache
+            + (tp->tp_version_tag % TYPE_VERSION_CACHE_SIZE);
+        *slot = NULL;
+    }
+#endif
+    FT_ATOMIC_STORE_UINT32_RELAXED(tp->tp_version_tag, version);
+#ifndef Py_GIL_DISABLED
+    if (version != 0) {
+        PyTypeObject **slot =
+            interp->types.type_version_cache
+            + (version % TYPE_VERSION_CACHE_SIZE);
+        *slot = tp;
+    }
+#endif
+}
+
+PyTypeObject *
+_PyType_LookupByVersion(unsigned int version)
+{
+#ifdef Py_GIL_DISABLED
+    return NULL;
+#else
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    PyTypeObject **slot =
+        interp->types.type_version_cache
+        + (version % TYPE_VERSION_CACHE_SIZE);
+    if (*slot && (*slot)->tp_version_tag == version) {
+        return *slot;
+    }
+    return NULL;
+#endif
+}
+
+unsigned int
+_PyType_GetVersionForCurrentState(PyTypeObject *tp)
+{
+    return tp->tp_version_tag;
+}
+
+
+
 #define MAX_VERSIONS_PER_CLASS 1000
 
 static int
@@ -1137,8 +1196,7 @@ assign_version_tag(PyInterpreterState *interp, PyTypeObject *type)
             /* We have run out of version numbers */
             return 0;
         }
-        FT_ATOMIC_STORE_UINT32_RELAXED(type->tp_version_tag,
-                                       NEXT_GLOBAL_VERSION_TAG++);
+        _PyType_SetVersion(type, NEXT_GLOBAL_VERSION_TAG++);
         assert (type->tp_version_tag <= _Py_MAX_GLOBAL_TYPE_VERSION_TAG);
     }
     else {
@@ -1147,8 +1205,7 @@ assign_version_tag(PyInterpreterState *interp, PyTypeObject *type)
             /* We have run out of version numbers */
             return 0;
         }
-        FT_ATOMIC_STORE_UINT32_RELAXED(type->tp_version_tag,
-                                       NEXT_VERSION_TAG(interp)++);
+        _PyType_SetVersion(type, NEXT_VERSION_TAG(interp)++);
         assert (type->tp_version_tag != 0);
     }
 
@@ -5768,7 +5825,7 @@ fini_static_type(PyInterpreterState *interp, PyTypeObject *type,
     if (final) {
         type->tp_flags &= ~Py_TPFLAGS_READY;
         type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG;
-        type->tp_version_tag = 0;
+        _PyType_SetVersion(type, 0);
     }
 
     _PyStaticType_ClearWeakRefs(interp, type);
@@ -5798,7 +5855,6 @@ type_dealloc(PyObject *self)
     _PyObject_ASSERT((PyObject *)type, type->tp_flags & Py_TPFLAGS_HEAPTYPE);
 
     _PyObject_GC_UNTRACK(type);
-
     type_dealloc_common(type);
 
     // PyObject_ClearWeakRefs() raises an exception if Py_REFCNT() != 0
@@ -8367,7 +8423,7 @@ init_static_type(PyInterpreterState *interp, PyTypeObject *self,
         self->tp_flags |= Py_TPFLAGS_IMMUTABLETYPE;
 
         assert(NEXT_GLOBAL_VERSION_TAG <= _Py_MAX_GLOBAL_TYPE_VERSION_TAG);
-        self->tp_version_tag = NEXT_GLOBAL_VERSION_TAG++;
+        _PyType_SetVersion(self, NEXT_GLOBAL_VERSION_TAG++);
         self->tp_flags |= Py_TPFLAGS_VALID_VERSION_TAG;
     }
     else {
index e5d3793bd4d2045a5dfe65f4fda3a13dc6e139c6..75d1d9f6b2a794b9716797562bb606c5bf5914e6 100644 (file)
@@ -79,6 +79,7 @@ increment_mutations(PyObject* dict) {
  * so we don't need to check that they haven't been used */
 #define BUILTINS_WATCHER_ID 0
 #define GLOBALS_WATCHER_ID  1
+#define TYPE_WATCHER_ID  0
 
 static int
 globals_watcher_callback(PyDict_WatchEvent event, PyObject* dict,
@@ -92,6 +93,14 @@ globals_watcher_callback(PyDict_WatchEvent event, PyObject* dict,
     return 0;
 }
 
+static int
+type_watcher_callback(PyTypeObject* type)
+{
+    _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), type, 1);
+    PyType_Unwatch(TYPE_WATCHER_ID, (PyObject *)type);
+    return 0;
+}
+
 static PyObject *
 convert_global_to_const(_PyUOpInstruction *inst, PyObject *obj)
 {
@@ -167,6 +176,9 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
     if (interp->dict_state.watchers[GLOBALS_WATCHER_ID] == NULL) {
         interp->dict_state.watchers[GLOBALS_WATCHER_ID] = globals_watcher_callback;
     }
+    if (interp->type_watchers[TYPE_WATCHER_ID] == NULL) {
+        interp->type_watchers[TYPE_WATCHER_ID] = type_watcher_callback;
+    }
     for (int pc = 0; pc < buffer_size; pc++) {
         _PyUOpInstruction *inst = &buffer[pc];
         int opcode = inst->opcode;
@@ -310,9 +322,11 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
 #define sym_has_type _Py_uop_sym_has_type
 #define sym_get_type _Py_uop_sym_get_type
 #define sym_matches_type _Py_uop_sym_matches_type
+#define sym_matches_type_version _Py_uop_sym_matches_type_version
 #define sym_set_null(SYM) _Py_uop_sym_set_null(ctx, SYM)
 #define sym_set_non_null(SYM) _Py_uop_sym_set_non_null(ctx, SYM)
 #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE)
+#define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION)
 #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST)
 #define sym_is_bottom _Py_uop_sym_is_bottom
 #define sym_truthiness _Py_uop_sym_truthiness
@@ -395,7 +409,7 @@ optimize_uops(
     _PyUOpInstruction *corresponding_check_stack = NULL;
 
     _Py_uop_abstractcontext_init(ctx);
-    _Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, co, ctx->n_consumed, 0, curr_stacklen);
+    _Py_UOpsAbstractFrame *frame = _Py_uop_frame_new(ctx, co, curr_stacklen, NULL, 0);
     if (frame == NULL) {
         return -1;
     }
index a2cb4c0b2c5192f2afb8f0497892a7d2b2fd3d14..e6fb85a90603ebf191180a68727d934c66de5773 100644 (file)
@@ -21,11 +21,13 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;
 #define sym_new_const _Py_uop_sym_new_const
 #define sym_new_null _Py_uop_sym_new_null
 #define sym_matches_type _Py_uop_sym_matches_type
+#define sym_matches_type_version _Py_uop_sym_matches_type_version
 #define sym_get_type _Py_uop_sym_get_type
 #define sym_has_type _Py_uop_sym_has_type
 #define sym_set_null(SYM) _Py_uop_sym_set_null(ctx, SYM)
 #define sym_set_non_null(SYM) _Py_uop_sym_set_non_null(ctx, SYM)
 #define sym_set_type(SYM, TYPE) _Py_uop_sym_set_type(ctx, SYM, TYPE)
+#define sym_set_type_version(SYM, VERSION) _Py_uop_sym_set_type_version(ctx, SYM, VERSION)
 #define sym_set_const(SYM, CNST) _Py_uop_sym_set_const(ctx, SYM, CNST)
 #define sym_is_bottom _Py_uop_sym_is_bottom
 #define frame_new _Py_uop_frame_new
@@ -113,6 +115,29 @@ dummy_func(void) {
         sym_set_type(right, &PyLong_Type);
     }
 
+    op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) {
+        assert(type_version);
+        if (sym_matches_type_version(owner, type_version)) {
+            REPLACE_OP(this_instr, _NOP, 0, 0);
+        } else {
+            // add watcher so that whenever the type changes we invalidate this
+            PyTypeObject *type = _PyType_LookupByVersion(type_version);
+            // if the type is null, it was not found in the cache (there was a conflict)
+            // with the key, in which case we can't trust the version
+            if (type) {
+                // if the type version was set properly, then add a watcher
+                // if it wasn't this means that the type version was previously set to something else
+                // and we set the owner to bottom, so we don't need to add a watcher because we must have
+                // already added one earlier.
+                if (sym_set_type_version(owner, type_version)) {
+                    PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type);
+                    _Py_BloomFilter_Add(dependencies, type);
+                }
+            }
+
+        }
+    }
+
     op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) {
         if (sym_matches_type(left, &PyFloat_Type)) {
             if (sym_matches_type(right, &PyFloat_Type)) {
@@ -563,16 +588,12 @@ dummy_func(void) {
             argcount++;
         }
 
-        _Py_UopsSymbol **localsplus_start = ctx->n_consumed;
-        int n_locals_already_filled = 0;
-        // Can determine statically, so we interleave the new locals
-        // and make the current stack the new locals.
-        // This also sets up for true call inlining.
         if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) {
-            localsplus_start = args;
-            n_locals_already_filled = argcount;
+            new_frame = frame_new(ctx, co, 0, args, argcount);
+        } else {
+            new_frame = frame_new(ctx, co, 0, NULL, 0);
+
         }
-        new_frame = frame_new(ctx, co, localsplus_start, n_locals_already_filled, 0);
     }
 
     op(_PY_FRAME_GENERAL, (callable, self_or_null, args[oparg] -- new_frame: _Py_UOpsAbstractFrame *)) {
index b3787345ec6714df06df0008b7638902617abab9..18f3ca4cb73e5a31dfd101e4cca8e10d060fd6de 100644 (file)
         }
 
         case _GUARD_TYPE_VERSION: {
+            _Py_UopsSymbol *owner;
+            owner = stack_pointer[-1];
+            uint32_t type_version = (uint32_t)this_instr->operand;
+            assert(type_version);
+            if (sym_matches_type_version(owner, type_version)) {
+                REPLACE_OP(this_instr, _NOP, 0, 0);
+            } else {
+                // add watcher so that whenever the type changes we invalidate this
+                PyTypeObject *type = _PyType_LookupByVersion(type_version);
+                // if the type is null, it was not found in the cache (there was a conflict)
+                // with the key, in which case we can't trust the version
+                if (type) {
+                    // if the type version was set properly, then add a watcher
+                    // if it wasn't this means that the type version was previously set to something else
+                    // and we set the owner to bottom, so we don't need to add a watcher because we must have
+                    // already added one earlier.
+                    if (sym_set_type_version(owner, type_version)) {
+                        PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type);
+                        _Py_BloomFilter_Add(dependencies, type);
+                    }
+                }
+            }
             break;
         }
 
                 args--;
                 argcount++;
             }
-            _Py_UopsSymbol **localsplus_start = ctx->n_consumed;
-            int n_locals_already_filled = 0;
-            // Can determine statically, so we interleave the new locals
-            // and make the current stack the new locals.
-            // This also sets up for true call inlining.
             if (sym_is_null(self_or_null) || sym_is_not_null(self_or_null)) {
-                localsplus_start = args;
-                n_locals_already_filled = argcount;
+                new_frame = frame_new(ctx, co, 0, args, argcount);
+            } else {
+                new_frame = frame_new(ctx, co, 0, NULL, 0);
             }
-            new_frame = frame_new(ctx, co, localsplus_start, n_locals_already_filled, 0);
             stack_pointer[-2 - oparg] = (_Py_UopsSymbol *)new_frame;
             stack_pointer += -1 - oparg;
             break;
index e546eef306eeca082b9bf659609a9faaef716777..f3d4078bf1a890b8bb0298ea6c0e1f8e8df986ba 100644 (file)
@@ -52,7 +52,8 @@ static inline int get_lltrace(void) {
 static _Py_UopsSymbol NO_SPACE_SYMBOL = {
     .flags = IS_NULL | NOT_NULL | NO_SPACE,
     .typ = NULL,
-    .const_val = NULL
+    .const_val = NULL,
+    .type_version = 0,
 };
 
 _Py_UopsSymbol *
@@ -76,6 +77,7 @@ sym_new(_Py_UOpsContext *ctx)
     self->flags = 0;
     self->typ = NULL;
     self->const_val = NULL;
+    self->type_version = 0;
 
     return self;
 }
@@ -152,6 +154,18 @@ _Py_uop_sym_set_type(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyTypeObject *ty
     }
 }
 
+bool
+_Py_uop_sym_set_type_version(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, unsigned int version)
+{
+    // if the type version was already set, then it must be different and we should set it to bottom
+    if (sym->type_version) {
+        sym_set_bottom(ctx, sym);
+        return false;
+    }
+    sym->type_version = version;
+    return true;
+}
+
 void
 _Py_uop_sym_set_const(_Py_UOpsContext *ctx, _Py_UopsSymbol *sym, PyObject *const_val)
 {
@@ -256,6 +270,12 @@ _Py_uop_sym_get_type(_Py_UopsSymbol *sym)
     return sym->typ;
 }
 
+unsigned int
+_Py_uop_sym_get_type_version(_Py_UopsSymbol *sym)
+{
+    return sym->type_version;
+}
+
 bool
 _Py_uop_sym_has_type(_Py_UopsSymbol *sym)
 {
@@ -272,6 +292,13 @@ _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ)
     return _Py_uop_sym_get_type(sym) == typ;
 }
 
+bool
+_Py_uop_sym_matches_type_version(_Py_UopsSymbol *sym, unsigned int version)
+{
+    return _Py_uop_sym_get_type_version(sym) == version;
+}
+
+
 int
 _Py_uop_sym_truthiness(_Py_UopsSymbol *sym)
 {
@@ -311,9 +338,9 @@ _Py_UOpsAbstractFrame *
 _Py_uop_frame_new(
     _Py_UOpsContext *ctx,
     PyCodeObject *co,
-    _Py_UopsSymbol **localsplus_start,
-    int n_locals_already_filled,
-    int curr_stackentries)
+    int curr_stackentries,
+    _Py_UopsSymbol **args,
+    int arg_len)
 {
     assert(ctx->curr_frame_depth < MAX_ABSTRACT_FRAME_DEPTH);
     _Py_UOpsAbstractFrame *frame = &ctx->frames[ctx->curr_frame_depth];
@@ -321,19 +348,22 @@ _Py_uop_frame_new(
     frame->stack_len = co->co_stacksize;
     frame->locals_len = co->co_nlocalsplus;
 
-    frame->locals = localsplus_start;
+    frame->locals = ctx->n_consumed;
     frame->stack = frame->locals + co->co_nlocalsplus;
     frame->stack_pointer = frame->stack + curr_stackentries;
-    ctx->n_consumed = localsplus_start + (co->co_nlocalsplus + co->co_stacksize);
+    ctx->n_consumed = ctx->n_consumed + (co->co_nlocalsplus + co->co_stacksize);
     if (ctx->n_consumed >= ctx->limit) {
         ctx->done = true;
         ctx->out_of_space = true;
         return NULL;
     }
 
-
     // Initialize with the initial state of all local variables
-    for (int i = n_locals_already_filled; i < co->co_nlocalsplus; i++) {
+    for (int i = 0; i < arg_len; i++) {
+        frame->locals[i] = args[i];
+    }
+
+    for (int i = arg_len; i < co->co_nlocalsplus; i++) {
         _Py_UopsSymbol *local = _Py_uop_sym_new_unknown(ctx);
         frame->locals[i] = local;
     }