]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-122417: Implement per-thread heap type refcounts (#122418)
authorSam Gross <colesbury@gmail.com>
Tue, 6 Aug 2024 18:36:57 +0000 (14:36 -0400)
committerGitHub <noreply@github.com>
Tue, 6 Aug 2024 18:36:57 +0000 (14:36 -0400)
The free-threaded build partially stores heap type reference counts in
distributed manner in per-thread arrays. This avoids reference count
contention when creating or destroying instances.

Co-authored-by: Ken Jin <kenjin@python.org>
18 files changed:
Include/cpython/object.h
Include/internal/pycore_gc.h
Include/internal/pycore_interp.h
Include/internal/pycore_object.h
Include/internal/pycore_tstate.h
Include/internal/pycore_typeid.h [new file with mode: 0644]
Lib/test/test_sys.py
Makefile.pre.in
Misc/NEWS.d/next/Core_and_Builtins/2024-07-29-19-20-25.gh-issue-122417.NVgs0a.rst [new file with mode: 0644]
Objects/object.c
Objects/typeobject.c
PCbuild/_freeze_module.vcxproj
PCbuild/_freeze_module.vcxproj.filters
PCbuild/pythoncore.vcxproj
PCbuild/pythoncore.vcxproj.filters
Python/gc_free_threading.c
Python/pystate.c
Python/typeid.c [new file with mode: 0644]

index 90cd7b54b34161194f00e289663380d8435059b2..e1024ddbdf60625fbad2374eb3033a2b9d98e34d 100644 (file)
@@ -270,6 +270,9 @@ typedef struct _heaptypeobject {
     PyObject *ht_module;
     char *_ht_tpname;  // Storage for "tp_name"; see PyType_FromModuleAndSpec
     struct _specialization_cache _spec_cache; // For use by the specializer.
+#ifdef Py_GIL_DISABLED
+    Py_ssize_t unique_id;  // ID used for thread-local refcounting
+#endif
     /* here are optional user slots, followed by the members. */
 } PyHeapTypeObject;
 
index b4bf36e82e376adbfa9da493ed0511c30ae09806..5dd5b0c78d42fab52cfc9b0a622bbeb3aa5a72cd 100644 (file)
@@ -381,10 +381,6 @@ extern void _PyGC_ClearAllFreeLists(PyInterpreterState *interp);
 extern void _Py_ScheduleGC(PyThreadState *tstate);
 extern void _Py_RunGC(PyThreadState *tstate);
 
-#ifdef Py_GIL_DISABLED
-// gh-117783: Immortalize objects that use deferred reference counting
-extern void _PyGC_ImmortalizeDeferredObjects(PyInterpreterState *interp);
-#endif
 
 #ifdef __cplusplus
 }
index 4a83862ac13e268eeb433819c1d04dc20bcf89fa..a1c1dd0c9572309d515431a6250571d145545e9c 100644 (file)
@@ -35,6 +35,7 @@ extern "C" {
 #include "pycore_qsbr.h"          // struct _qsbr_state
 #include "pycore_tstate.h"        // _PyThreadStateImpl
 #include "pycore_tuple.h"         // struct _Py_tuple_state
+#include "pycore_typeid.h"        // struct _Py_type_id_pool
 #include "pycore_typeobject.h"    // struct types_state
 #include "pycore_unicodeobject.h" // struct _Py_unicode_state
 #include "pycore_warnings.h"      // struct _warnings_runtime_state
@@ -220,6 +221,7 @@ struct _is {
 #if defined(Py_GIL_DISABLED)
     struct _mimalloc_interp_state mimalloc;
     struct _brc_state brc;  // biased reference counting state
+    struct _Py_type_id_pool type_ids;
     PyMutex weakref_locks[NUM_WEAKREF_LIST_LOCKS];
 #endif
 
index 155810d00bef5ba2f62400a74b0a242c584ebd2f..a5640b7bcb7d60faaf843a0ff160d5e778c2f4f2 100644 (file)
@@ -14,10 +14,19 @@ extern "C" {
 #include "pycore_interp.h"        // PyInterpreterState.gc
 #include "pycore_pyatomic_ft_wrappers.h"  // FT_ATOMIC_STORE_PTR_RELAXED
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
+#include "pycore_typeid.h"        // _PyType_IncrefSlow
 
 
 #define _Py_IMMORTAL_REFCNT_LOOSE ((_Py_IMMORTAL_REFCNT >> 1) + 1)
 
+// This value is added to `ob_ref_shared` for objects that use deferred
+// reference counting so that they are not immediately deallocated when the
+// non-deferred reference count drops to zero.
+//
+// The value is half the maximum shared refcount because the low two bits of
+// `ob_ref_shared` are used for flags.
+#define _Py_REF_DEFERRED (PY_SSIZE_T_MAX / 8)
+
 // gh-121528, gh-118997: Similar to _Py_IsImmortal() but be more loose when
 // comparing the reference count to stay compatible with C extensions built
 // with the stable ABI 3.11 or older. Such extensions implement INCREF/DECREF
@@ -280,6 +289,67 @@ extern PyStatus _PyObject_InitState(PyInterpreterState *interp);
 extern void _PyObject_FiniState(PyInterpreterState *interp);
 extern bool _PyRefchain_IsTraced(PyInterpreterState *interp, PyObject *obj);
 
+#ifndef Py_GIL_DISABLED
+#  define _Py_INCREF_TYPE Py_INCREF
+#  define _Py_DECREF_TYPE Py_DECREF
+#else
+static inline void
+_Py_INCREF_TYPE(PyTypeObject *type)
+{
+    if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
+        assert(_Py_IsImmortal(type));
+        return;
+    }
+
+    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
+    PyHeapTypeObject *ht = (PyHeapTypeObject *)type;
+
+    // Unsigned comparison so that `unique_id=-1`, which indicates that
+    // per-thread refcounting has been disabled on this type, is handled by
+    // the "else".
+    if ((size_t)ht->unique_id < (size_t)tstate->types.size) {
+#  ifdef Py_REF_DEBUG
+        _Py_INCREF_IncRefTotal();
+#  endif
+        _Py_INCREF_STAT_INC();
+        tstate->types.refcounts[ht->unique_id]++;
+    }
+    else {
+        // The slow path resizes the thread-local refcount array if necessary.
+        // It handles the unique_id=-1 case to keep the inlinable function smaller.
+        _PyType_IncrefSlow(ht);
+    }
+}
+
+static inline void
+_Py_DECREF_TYPE(PyTypeObject *type)
+{
+    if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
+        assert(_Py_IsImmortal(type));
+        return;
+    }
+
+    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
+    PyHeapTypeObject *ht = (PyHeapTypeObject *)type;
+
+    // Unsigned comparison so that `unique_id=-1`, which indicates that
+    // per-thread refcounting has been disabled on this type, is handled by
+    // the "else".
+    if ((size_t)ht->unique_id < (size_t)tstate->types.size) {
+#  ifdef Py_REF_DEBUG
+        _Py_DECREF_DecRefTotal();
+#  endif
+        _Py_DECREF_STAT_INC();
+        tstate->types.refcounts[ht->unique_id]--;
+    }
+    else {
+        // Directly decref the type if the type id is not assigned or if
+        // per-thread refcounting has been disabled on this type.
+        Py_DECREF(type);
+    }
+}
+#endif
+
 /* Inline functions trading binary compatibility for speed:
    _PyObject_Init() is the fast version of PyObject_Init(), and
    _PyObject_InitVar() is the fast version of PyObject_InitVar().
@@ -291,7 +361,7 @@ _PyObject_Init(PyObject *op, PyTypeObject *typeobj)
     assert(op != NULL);
     Py_SET_TYPE(op, typeobj);
     assert(_PyType_HasFeature(typeobj, Py_TPFLAGS_HEAPTYPE) || _Py_IsImmortalLoose(typeobj));
-    Py_INCREF(typeobj);
+    _Py_INCREF_TYPE(typeobj);
     _Py_NewReference(op);
 }
 
index 18c972bd36759925ae664eb285d2a998f12237e4..f681b644c9ad5d948e788269f4b818c7301a1a1f 100644 (file)
@@ -31,6 +31,16 @@ typedef struct _PyThreadStateImpl {
     struct _mimalloc_thread_state mimalloc;
     struct _Py_freelists freelists;
     struct _brc_thread_state brc;
+    struct {
+        // The thread-local refcounts for heap type objects
+        Py_ssize_t *refcounts;
+
+        // Size of the refcounts array.
+        Py_ssize_t size;
+
+        // If set, don't use thread-local refcounts
+        int is_finalized;
+    } types;
 #endif
 
 #if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED)
diff --git a/Include/internal/pycore_typeid.h b/Include/internal/pycore_typeid.h
new file mode 100644 (file)
index 0000000..e64d144
--- /dev/null
@@ -0,0 +1,75 @@
+#ifndef Py_INTERNAL_TYPEID_H
+#define Py_INTERNAL_TYPEID_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef Py_BUILD_CORE
+#  error "this header requires Py_BUILD_CORE define"
+#endif
+
+#ifdef Py_GIL_DISABLED
+
+// This contains code for allocating unique ids to heap type objects
+// and re-using those ids when the type is deallocated.
+//
+// The type ids are used to implement per-thread reference counts of
+// heap type objects to avoid contention on the reference count fields
+// of heap type objects. Static type objects are immortal, so contention
+// is not an issue for those types.
+//
+// Type id of -1 is used to indicate a type doesn't use thread-local
+// refcounting. This value is used when a type object is finalized by the GC
+// and during interpreter shutdown to allow the type object to be
+// deallocated promptly when the object's refcount reaches zero.
+//
+// Each entry implicitly represents a type id based on it's offset in the
+// table. Non-allocated entries form a free-list via the 'next' pointer.
+// Allocated entries store the corresponding PyTypeObject.
+typedef union _Py_type_id_entry {
+    // Points to the next free type id, when part of the freelist
+    union _Py_type_id_entry *next;
+
+    // Stores the type object when the id is assigned
+    PyHeapTypeObject *type;
+} _Py_type_id_entry;
+
+struct _Py_type_id_pool {
+    PyMutex mutex;
+
+    // combined table of types with allocated type ids and unallocated
+    // type ids.
+    _Py_type_id_entry *table;
+
+    // Next entry to allocate inside 'table' or NULL
+    _Py_type_id_entry *freelist;
+
+    // size of 'table'
+    Py_ssize_t size;
+};
+
+// Assigns the next id from the pool of type ids.
+extern void _PyType_AssignId(PyHeapTypeObject *type);
+
+// Releases the allocated type id back to the pool.
+extern void _PyType_ReleaseId(PyHeapTypeObject *type);
+
+// Merges the thread-local reference counts into the corresponding types.
+extern void _PyType_MergeThreadLocalRefcounts(_PyThreadStateImpl *tstate);
+
+// Like _PyType_MergeThreadLocalRefcounts, but also frees the thread-local
+// array of refcounts.
+extern void _PyType_FinalizeThreadLocalRefcounts(_PyThreadStateImpl *tstate);
+
+// Frees the interpreter's pool of type ids.
+extern void _PyType_FinalizeIdPool(PyInterpreterState *interp);
+
+// Increfs the type, resizing the thread-local refcount array if necessary.
+PyAPI_FUNC(void) _PyType_IncrefSlow(PyHeapTypeObject *type);
+
+#endif   /* Py_GIL_DISABLED */
+
+#ifdef __cplusplus
+}
+#endif
+#endif   /* !Py_INTERNAL_TYPEID_H */
index 709355e293f2fc9045113d7033f67d22e615a056..42b5a7c94e7700783fd634e96b8aa9fffe6854b6 100644 (file)
@@ -1710,6 +1710,7 @@ class SizeofTest(unittest.TestCase):
         fmt = 'P2nPI13Pl4Pn9Pn12PIPc'
         s = vsize(fmt)
         check(int, s)
+        typeid = 'n' if support.Py_GIL_DISABLED else ''
         # class
         s = vsize(fmt +                 # PyTypeObject
                   '4P'                  # PyAsyncMethods
@@ -1718,7 +1719,8 @@ class SizeofTest(unittest.TestCase):
                   '10P'                 # PySequenceMethods
                   '2P'                  # PyBufferProcs
                   '6P'
-                  '1PIP'                 # Specializer cache
+                  '1PIP'                # Specializer cache
+                  + typeid              # heap type id (free-threaded only)
                   )
         class newstyleclass(object): pass
         # Separate block for PyDictKeysObject with 8 keys and 5 entries
index 5608e593ac9aca5f622860b692c51eef3df0237e..66b3665e9c29ae2e3c566a15f63124e98dee8efd 100644 (file)
@@ -483,6 +483,7 @@ PYTHON_OBJS=        \
                Python/thread.o \
                Python/traceback.o \
                Python/tracemalloc.o \
+               Python/typeid.o \
                Python/getopt.o \
                Python/pystrcmp.o \
                Python/pystrtod.o \
@@ -1257,6 +1258,7 @@ PYTHON_HEADERS= \
                $(srcdir)/Include/internal/pycore_tracemalloc.h \
                $(srcdir)/Include/internal/pycore_tstate.h \
                $(srcdir)/Include/internal/pycore_tuple.h \
+               $(srcdir)/Include/internal/pycore_typeid.h \
                $(srcdir)/Include/internal/pycore_typeobject.h \
                $(srcdir)/Include/internal/pycore_typevarobject.h \
                $(srcdir)/Include/internal/pycore_ucnhash.h \
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-07-29-19-20-25.gh-issue-122417.NVgs0a.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-07-29-19-20-25.gh-issue-122417.NVgs0a.rst
new file mode 100644 (file)
index 0000000..b050c9c
--- /dev/null
@@ -0,0 +1,4 @@
+In the free-threaded build, the reference counts for heap type objects are now
+partially stored in a distributed manner in per-thread arrays.  This reduces
+contention on the heap type's reference count fields when creating or
+destroying instances of the same type from multiple threads concurrently.
index db9d3e46795668c85e2b1048aeb439e4cc8c68e4..c6d46caa0bb62b5d628086a72000b2fc445d9b25 100644 (file)
@@ -2477,15 +2477,7 @@ _PyObject_SetDeferredRefcount(PyObject *op)
     assert(_Py_IsOwnedByCurrentThread(op));
     assert(op->ob_ref_shared == 0);
     _PyObject_SET_GC_BITS(op, _PyGC_BITS_DEFERRED);
-    PyInterpreterState *interp = _PyInterpreterState_GET();
-    if (_Py_atomic_load_int_relaxed(&interp->gc.immortalize) == 1) {
-        // gh-117696: immortalize objects instead of using deferred reference
-        // counting for now.
-        _Py_SetImmortal(op);
-        return;
-    }
-    op->ob_ref_local += 1;
-    op->ob_ref_shared = _Py_REF_QUEUED;
+    op->ob_ref_shared = _Py_REF_SHARED(_Py_REF_DEFERRED, 0);
 #endif
 }
 
index a2d82e65b6ad9f11551a1a5c7b6f36b47bf31281..00f0dc9849b5c877bce9733d1acfb42d4ad7494e 100644 (file)
@@ -2452,7 +2452,7 @@ subtype_dealloc(PyObject *self)
            reference counting. Only decref if the base type is not already a heap
            allocated type. Otherwise, basedealloc should have decref'd it already */
         if (type_needs_decref) {
-            Py_DECREF(type);
+            _Py_DECREF_TYPE(type);
         }
 
         /* Done */
@@ -2562,7 +2562,7 @@ subtype_dealloc(PyObject *self)
        reference counting. Only decref if the base type is not already a heap
        allocated type. Otherwise, basedealloc should have decref'd it already */
     if (type_needs_decref) {
-        Py_DECREF(type);
+        _Py_DECREF_TYPE(type);
     }
 
   endlabel:
@@ -3913,7 +3913,9 @@ type_new_alloc(type_new_ctx *ctx)
     et->ht_module = NULL;
     et->_ht_tpname = NULL;
 
-    _PyObject_SetDeferredRefcount((PyObject *)et);
+#ifdef Py_GIL_DISABLED
+    _PyType_AssignId(et);
+#endif
 
     return type;
 }
@@ -4965,6 +4967,11 @@ _PyType_FromMetaclass_impl(
     type->tp_weaklistoffset = weaklistoffset;
     type->tp_dictoffset = dictoffset;
 
+#ifdef Py_GIL_DISABLED
+    // Assign a type id to enable thread-local refcounting
+    _PyType_AssignId(res);
+#endif
+
     /* Ready the type (which includes inheritance).
      *
      * After this call we should generally only touch up what's
@@ -5914,6 +5921,9 @@ type_dealloc(PyObject *self)
     }
     Py_XDECREF(et->ht_module);
     PyMem_Free(et->_ht_tpname);
+#ifdef Py_GIL_DISABLED
+    _PyType_ReleaseId(et);
+#endif
     Py_TYPE(type)->tp_free((PyObject *)type);
 }
 
index e5e18de60ec3490d62ea8fbfc867af05d4a1beab..962d754e4a121df3df6375d6d26f8b43c39dbaf3 100644 (file)
     <ClCompile Include="..\Python\thread.c" />
     <ClCompile Include="..\Python\traceback.c" />
     <ClCompile Include="..\Python\tracemalloc.c" />
+    <ClCompile Include="..\Python\typeid.c" />
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\PC\pyconfig.h.in" />
index 9630f54ae4ea29bc481fedccf1cf92913a1b1199..86146f73857bd4f7f9fe663380b65fd935ab766a 100644 (file)
     <ClCompile Include="..\Python\tracemalloc.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\Python\typeid.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\Objects\tupleobject.c">
       <Filter>Source Files</Filter>
     </ClCompile>
index 9e3af689f4a2888bcf538d83389ddd0d3ebcbba5..7991eb93aa2c8ab7a97571acc9f6a435b5c1fb6f 100644 (file)
     <ClInclude Include="..\Include\internal\pycore_tracemalloc.h" />
     <ClInclude Include="..\Include\internal\pycore_tstate.h" />
     <ClInclude Include="..\Include\internal\pycore_tuple.h" />
+    <ClInclude Include="..\Include\internal\pycore_typeid.h" />
     <ClInclude Include="..\Include\internal\pycore_typeobject.h" />
     <ClInclude Include="..\Include\internal\pycore_typevarobject.h" />
     <ClInclude Include="..\Include\internal\pycore_ucnhash.h" />
     <ClCompile Include="..\Python\thread.c" />
     <ClCompile Include="..\Python\traceback.c" />
     <ClCompile Include="..\Python\tracemalloc.c" />
+    <ClCompile Include="..\Python\typeid.c" />
   </ItemGroup>
   <ItemGroup Condition="$(IncludeExternals)">
     <ClCompile Include="..\Modules\zlibmodule.c" />
index 31f7971bda845d5422a39fb73429e02e2bf81120..075910915fb912fe61b9154f3d9de74db08e1a2c 100644 (file)
     <ClInclude Include="..\Include\internal\pycore_tuple.h">
       <Filter>Include\internal</Filter>
     </ClInclude>
+    <ClInclude Include="..\Include\internal\pycore_typeid.h">
+      <Filter>Include\internal</Filter>
+    </ClInclude>
     <ClInclude Include="..\Include\internal\pycore_typeobject.h">
       <Filter>Include\internal</Filter>
     </ClInclude>
     <ClCompile Include="..\Python\tracemalloc.c">
       <Filter>Python</Filter>
     </ClCompile>
+    <ClCompile Include="..\Python\typeid.c">
+      <Filter>Python</Filter>
+    </ClCompile>
     <ClCompile Include="..\Python\bootstrap_hash.c">
       <Filter>Python</Filter>
     </ClCompile>
index 53f04160c38841da791f04113c156e053a69ea85..1e02db00649c750c1a7f9f00c4c70d611f81a50a 100644 (file)
@@ -15,6 +15,7 @@
 #include "pycore_tstate.h"        // _PyThreadStateImpl
 #include "pycore_weakref.h"       // _PyWeakref_ClearRef()
 #include "pydtrace.h"
+#include "pycore_typeid.h"        // _PyType_MergeThreadLocalRefcounts
 
 #ifdef Py_GIL_DISABLED
 
@@ -164,7 +165,15 @@ disable_deferred_refcounting(PyObject *op)
 {
     if (_PyObject_HasDeferredRefcount(op)) {
         op->ob_gc_bits &= ~_PyGC_BITS_DEFERRED;
-        op->ob_ref_shared -= (1 << _Py_REF_SHARED_SHIFT);
+        op->ob_ref_shared -= _Py_REF_SHARED(_Py_REF_DEFERRED, 0);
+
+        if (PyType_Check(op)) {
+            // Disable thread-local refcounting for heap types
+            PyTypeObject *type = (PyTypeObject *)op;
+            if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
+                _PyType_ReleaseId((PyHeapTypeObject *)op);
+            }
+        }
     }
 }
 
@@ -328,16 +337,6 @@ merge_queued_objects(_PyThreadStateImpl *tstate, struct collection_state *state)
     }
 }
 
-static void
-merge_all_queued_objects(PyInterpreterState *interp, struct collection_state *state)
-{
-    HEAD_LOCK(&_PyRuntime);
-    for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
-        merge_queued_objects((_PyThreadStateImpl *)p, state);
-    }
-    HEAD_UNLOCK(&_PyRuntime);
-}
-
 static void
 process_delayed_frees(PyInterpreterState *interp)
 {
@@ -389,7 +388,9 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
     }
 
     Py_ssize_t refcount = Py_REFCNT(op);
-    refcount -= _PyObject_HasDeferredRefcount(op);
+    if (_PyObject_HasDeferredRefcount(op)) {
+        refcount -= _Py_REF_DEFERRED;
+    }
     _PyObject_ASSERT(op, refcount >= 0);
 
     if (refcount > 0 && !_PyObject_HasDeferredRefcount(op)) {
@@ -754,10 +755,6 @@ _PyGC_Init(PyInterpreterState *interp)
 {
     GCState *gcstate = &interp->gc;
 
-    // gh-117783: immortalize objects that would use deferred refcounting
-    // once the first non-main thread is created (but not in subinterpreters).
-    gcstate->immortalize = _Py_IsMainInterpreter(interp) ? 0 : -1;
-
     gcstate->garbage = PyList_New(0);
     if (gcstate->garbage == NULL) {
         return _PyStatus_NO_MEMORY();
@@ -1105,8 +1102,18 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
         state->gcstate->old[i-1].count = 0;
     }
 
-    // merge refcounts for all queued objects
-    merge_all_queued_objects(interp, state);
+    HEAD_LOCK(&_PyRuntime);
+    for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
+        _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)p;
+
+        // merge per-thread refcount for types into the type's actual refcount
+        _PyType_MergeThreadLocalRefcounts(tstate);
+
+        // merge refcounts for all queued objects
+        merge_queued_objects(tstate, state);
+    }
+    HEAD_UNLOCK(&_PyRuntime);
+
     process_delayed_frees(interp);
 
     // Find unreachable objects
@@ -1835,32 +1842,6 @@ custom_visitor_wrapper(const mi_heap_t *heap, const mi_heap_area_t *area,
     return true;
 }
 
-// gh-117783: Immortalize objects that use deferred reference counting to
-// temporarily work around scaling bottlenecks.
-static bool
-immortalize_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
-                    void *block, size_t block_size, void *args)
-{
-    PyObject *op = op_from_block(block, args, false);
-    if (op != NULL && _PyObject_HasDeferredRefcount(op)) {
-        _Py_SetImmortal(op);
-        op->ob_gc_bits &= ~_PyGC_BITS_DEFERRED;
-    }
-    return true;
-}
-
-void
-_PyGC_ImmortalizeDeferredObjects(PyInterpreterState *interp)
-{
-    struct visitor_args args;
-    _PyEval_StopTheWorld(interp);
-    if (interp->gc.immortalize == 0) {
-        gc_visit_heaps(interp, &immortalize_visitor, &args);
-        interp->gc.immortalize = 1;
-    }
-    _PyEval_StartTheWorld(interp);
-}
-
 void
 PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
 {
index 6fbd17f7eaeaa999025152639fc4220dbeb8e492..8f4818cee00d9dd97a571f701eaddde6d9b74478 100644 (file)
@@ -20,6 +20,7 @@
 #include "pycore_runtime_init.h"  // _PyRuntimeState_INIT
 #include "pycore_sysmodule.h"     // _PySys_Audit()
 #include "pycore_obmalloc.h"      // _PyMem_obmalloc_state_on_heap()
+#include "pycore_typeid.h"        // _PyType_FinalizeIdPool
 
 /* --------------------------------------------------------------------------
 CAUTION
@@ -1584,13 +1585,6 @@ new_threadstate(PyInterpreterState *interp, int whence)
         PyMem_RawFree(new_tstate);
     }
     else {
-#ifdef Py_GIL_DISABLED
-        if (_Py_atomic_load_int(&interp->gc.immortalize) == 0) {
-            // Immortalize objects marked as using deferred reference counting
-            // the first time a non-main thread is created.
-            _PyGC_ImmortalizeDeferredObjects(interp);
-        }
-#endif
     }
 
 #ifdef Py_GIL_DISABLED
@@ -1741,6 +1735,10 @@ PyThreadState_Clear(PyThreadState *tstate)
     struct _Py_freelists *freelists = _Py_freelists_GET();
     _PyObject_ClearFreeLists(freelists, 1);
 
+    // Merge our thread-local refcounts into the type's own refcount and
+    // free our local refcount array.
+    _PyType_FinalizeThreadLocalRefcounts((_PyThreadStateImpl *)tstate);
+
     // Remove ourself from the biased reference counting table of threads.
     _Py_brc_remove_thread(tstate);
 #endif
@@ -1799,6 +1797,7 @@ tstate_delete_common(PyThreadState *tstate, int release_gil)
     _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
     tstate->interp->object_state.reftotal += tstate_impl->reftotal;
     tstate_impl->reftotal = 0;
+    assert(tstate_impl->types.refcounts == NULL);
 #endif
 
     HEAD_UNLOCK(runtime);
diff --git a/Python/typeid.c b/Python/typeid.c
new file mode 100644 (file)
index 0000000..83a6872
--- /dev/null
@@ -0,0 +1,200 @@
+#include "Python.h"
+
+#include "pycore_lock.h"        // PyMutex_LockFlags()
+#include "pycore_pystate.h"     // _PyThreadState_GET()
+#include "pycore_object.h"      // _Py_IncRefTotal
+#include "pycore_typeid.h"
+
+// This contains code for allocating unique ids to heap type objects
+// and re-using those ids when the type is deallocated.
+//
+// See Include/internal/pycore_typeid.h for more details.
+
+#ifdef Py_GIL_DISABLED
+
+#define POOL_MIN_SIZE 8
+
+#define LOCK_POOL(pool) PyMutex_LockFlags(&pool->mutex, _Py_LOCK_DONT_DETACH)
+#define UNLOCK_POOL(pool) PyMutex_Unlock(&pool->mutex)
+
+static int
+resize_interp_type_id_pool(struct _Py_type_id_pool *pool)
+{
+    if ((size_t)pool->size > PY_SSIZE_T_MAX / (2 * sizeof(*pool->table))) {
+        return -1;
+    }
+
+    Py_ssize_t new_size = pool->size * 2;
+    if (new_size < POOL_MIN_SIZE) {
+        new_size = POOL_MIN_SIZE;
+    }
+
+    _Py_type_id_entry *table = PyMem_Realloc(pool->table,
+                                             new_size * sizeof(*pool->table));
+    if (table == NULL) {
+        return -1;
+    }
+
+    Py_ssize_t start = pool->size;
+    for (Py_ssize_t i = start; i < new_size - 1; i++) {
+        table[i].next = &table[i + 1];
+    }
+    table[new_size - 1].next = NULL;
+
+    pool->table = table;
+    pool->freelist = &table[start];
+    _Py_atomic_store_ssize(&pool->size, new_size);
+    return 0;
+}
+
+static int
+resize_local_refcounts(_PyThreadStateImpl *tstate)
+{
+    if (tstate->types.is_finalized) {
+        return -1;
+    }
+
+    struct _Py_type_id_pool *pool = &tstate->base.interp->type_ids;
+    Py_ssize_t size = _Py_atomic_load_ssize(&pool->size);
+
+    Py_ssize_t *refcnts = PyMem_Realloc(tstate->types.refcounts,
+                                        size * sizeof(Py_ssize_t));
+    if (refcnts == NULL) {
+        return -1;
+    }
+
+    Py_ssize_t old_size = tstate->types.size;
+    if (old_size < size) {
+       memset(refcnts + old_size, 0, (size - old_size) * sizeof(Py_ssize_t));
+    }
+
+    tstate->types.refcounts = refcnts;
+    tstate->types.size = size;
+    return 0;
+}
+
+void
+_PyType_AssignId(PyHeapTypeObject *type)
+{
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    struct _Py_type_id_pool *pool = &interp->type_ids;
+
+    LOCK_POOL(pool);
+    if (pool->freelist == NULL) {
+        if (resize_interp_type_id_pool(pool) < 0) {
+            type->unique_id = -1;
+            UNLOCK_POOL(pool);
+            return;
+        }
+    }
+
+    _Py_type_id_entry *entry = pool->freelist;
+    pool->freelist = entry->next;
+    entry->type = type;
+    _PyObject_SetDeferredRefcount((PyObject *)type);
+    type->unique_id = (entry - pool->table);
+    UNLOCK_POOL(pool);
+}
+
+void
+_PyType_ReleaseId(PyHeapTypeObject *type)
+{
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    struct _Py_type_id_pool *pool = &interp->type_ids;
+
+    if (type->unique_id < 0) {
+        // The type doesn't have an id assigned.
+        return;
+    }
+
+    LOCK_POOL(pool);
+    _Py_type_id_entry *entry = &pool->table[type->unique_id];
+    assert(entry->type == type);
+    entry->next = pool->freelist;
+    pool->freelist = entry;
+
+    type->unique_id = -1;
+    UNLOCK_POOL(pool);
+}
+
+void
+_PyType_IncrefSlow(PyHeapTypeObject *type)
+{
+    _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
+    if (type->unique_id < 0 || resize_local_refcounts(tstate) < 0) {
+        // just incref the type directly.
+        Py_INCREF(type);
+        return;
+    }
+
+    assert(type->unique_id < tstate->types.size);
+    tstate->types.refcounts[type->unique_id]++;
+#ifdef Py_REF_DEBUG
+    _Py_IncRefTotal((PyThreadState *)tstate);
+#endif
+    _Py_INCREF_STAT_INC();
+}
+
+void
+_PyType_MergeThreadLocalRefcounts(_PyThreadStateImpl *tstate)
+{
+    if (tstate->types.refcounts == NULL) {
+        return;
+    }
+
+    struct _Py_type_id_pool *pool = &tstate->base.interp->type_ids;
+
+    LOCK_POOL(pool);
+    for (Py_ssize_t i = 0, n = tstate->types.size; i < n; i++) {
+        Py_ssize_t refcnt = tstate->types.refcounts[i];
+        if (refcnt != 0) {
+            PyObject *type = (PyObject *)pool->table[i].type;
+            assert(PyType_Check(type));
+
+            _Py_atomic_add_ssize(&type->ob_ref_shared,
+                                 refcnt << _Py_REF_SHARED_SHIFT);
+            tstate->types.refcounts[i] = 0;
+        }
+    }
+    UNLOCK_POOL(pool);
+}
+
+void
+_PyType_FinalizeThreadLocalRefcounts(_PyThreadStateImpl *tstate)
+{
+    _PyType_MergeThreadLocalRefcounts(tstate);
+
+    PyMem_Free(tstate->types.refcounts);
+    tstate->types.refcounts = NULL;
+    tstate->types.size = 0;
+    tstate->types.is_finalized = 1;
+}
+
+void
+_PyType_FinalizeIdPool(PyInterpreterState *interp)
+{
+    struct _Py_type_id_pool *pool = &interp->type_ids;
+
+    // First, set the free-list to NULL values
+    while (pool->freelist) {
+        _Py_type_id_entry *next = pool->freelist->next;
+        pool->freelist->type = NULL;
+        pool->freelist = next;
+    }
+
+    // Now everything non-NULL is a type. Set the type's id to -1 in case it
+    // outlives the interpreter.
+    for (Py_ssize_t i = 0; i < pool->size; i++) {
+        PyHeapTypeObject *ht = pool->table[i].type;
+        if (ht) {
+            ht->unique_id = -1;
+            pool->table[i].type = NULL;
+        }
+    }
+    PyMem_Free(pool->table);
+    pool->table = NULL;
+    pool->freelist = NULL;
+    pool->size = 0;
+}
+
+#endif   /* Py_GIL_DISABLED */