]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-110481: Implement biased reference counting (gh-110764)
authorSam Gross <colesbury@gmail.com>
Mon, 30 Oct 2023 16:06:09 +0000 (12:06 -0400)
committerGitHub <noreply@github.com>
Mon, 30 Oct 2023 16:06:09 +0000 (16:06 +0000)
29 files changed:
Include/internal/pycore_long.h
Include/internal/pycore_object.h
Include/internal/pycore_runtime_init.h
Include/object.h
Lib/test/support/__init__.py
Misc/NEWS.d/next/Core and Builtins/2023-10-12-12-09-01.gh-issue-110481.3Er3it.rst [new file with mode: 0644]
Modules/_ctypes/_ctypes_test.c
Modules/_multiprocessing/posixshmem.c
Modules/_scproxy.c
Modules/_stat.c
Modules/_testcapi/heaptype_relative.c
Modules/_testcapi/vectorcall_limited.c
Modules/_testclinic_limited.c
Modules/_testimportmultiple.c
Modules/_uuidmodule.c
Modules/errnomodule.c
Modules/md5module.c
Modules/resource.c
Modules/xxlimited.c
Modules/xxlimited_35.c
Objects/object.c
Objects/setobject.c
Objects/sliceobject.c
Objects/unicodeobject.c
PC/winsound.c
Python/ceval.c
Python/instrumentation.c
Python/specialize.c
Tools/build/deepfreeze.py

index ddf79ab1d130e8bbafeb65ea03d84e470c093e53..c4ef6eff37b19948876d60fc26c0840117c20f69 100644 (file)
@@ -317,7 +317,7 @@ _PyLong_FlipSign(PyLongObject *op) {
 
 #define _PyLong_DIGIT_INIT(val) \
     { \
-        .ob_base = _PyObject_HEAD_INIT(&PyLong_Type) \
+        .ob_base = _PyObject_HEAD_INIT(&PyLong_Type), \
         .long_value  = { \
             .lv_tag = TAG_FROM_SIGN_AND_SIZE( \
                 (val) == 0 ? 0 : ((val) < 0 ? -1 : 1), \
index 2d50f42c9c614df2832530f2cff4a3216f0557c1..206d8a5d4cc5e13387fecab4e725bb42326b63bb 100644 (file)
@@ -54,16 +54,24 @@ PyAPI_FUNC(int) _PyObject_IsFreed(PyObject *);
    Furthermore, we can't use designated initializers in Extensions since these
    are not supported pre-C++20. Thus, keeping an internal copy here is the most
    backwards compatible solution */
+#if defined(Py_NOGIL)
+#define _PyObject_HEAD_INIT(type)                   \
+    {                                               \
+        .ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL,  \
+        .ob_type = (type)                           \
+    }
+#else
 #define _PyObject_HEAD_INIT(type)         \
     {                                     \
         .ob_refcnt = _Py_IMMORTAL_REFCNT, \
         .ob_type = (type)                 \
-    },
+    }
+#endif
 #define _PyVarObject_HEAD_INIT(type, size)    \
     {                                         \
-        .ob_base = _PyObject_HEAD_INIT(type)  \
+        .ob_base = _PyObject_HEAD_INIT(type), \
         .ob_size = size                       \
-    },
+    }
 
 extern void _Py_NO_RETURN _Py_FatalRefcountErrorFunc(
     const char *func,
@@ -95,24 +103,63 @@ static inline void _Py_RefcntAdd(PyObject* op, Py_ssize_t n)
 #ifdef Py_REF_DEBUG
     _Py_AddRefTotal(_PyInterpreterState_GET(), n);
 #endif
+#if !defined(Py_NOGIL)
     op->ob_refcnt += n;
+#else
+    if (_Py_IsOwnedByCurrentThread(op)) {
+        uint32_t local = op->ob_ref_local;
+        Py_ssize_t refcnt = (Py_ssize_t)local + n;
+#  if PY_SSIZE_T_MAX > UINT32_MAX
+        if (refcnt > (Py_ssize_t)UINT32_MAX) {
+            // Make the object immortal if the 32-bit local reference count
+            // would overflow.
+            refcnt = _Py_IMMORTAL_REFCNT_LOCAL;
+        }
+#  endif
+        _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, (uint32_t)refcnt);
+    }
+    else {
+        _Py_atomic_add_ssize(&op->ob_ref_shared, (n << _Py_REF_SHARED_SHIFT));
+    }
+#endif
 }
 #define _Py_RefcntAdd(op, n) _Py_RefcntAdd(_PyObject_CAST(op), n)
 
 static inline void _Py_SetImmortal(PyObject *op)
 {
     if (op) {
+#ifdef Py_NOGIL
+        op->ob_tid = _Py_UNOWNED_TID;
+        op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL;
+        op->ob_ref_shared = 0;
+#else
         op->ob_refcnt = _Py_IMMORTAL_REFCNT;
+#endif
     }
 }
 #define _Py_SetImmortal(op) _Py_SetImmortal(_PyObject_CAST(op))
 
+// Makes an immortal object mortal again with the specified refcnt. Should only
+// be used during runtime finalization.
+static inline void _Py_SetMortal(PyObject *op, Py_ssize_t refcnt)
+{
+    if (op) {
+        assert(_Py_IsImmortal(op));
+#ifdef Py_NOGIL
+        op->ob_tid = _Py_UNOWNED_TID;
+        op->ob_ref_local = 0;
+        op->ob_ref_shared = _Py_REF_SHARED(refcnt, _Py_REF_MERGED);
+#else
+        op->ob_refcnt = refcnt;
+#endif
+    }
+}
+
 /* _Py_ClearImmortal() should only be used during runtime finalization. */
 static inline void _Py_ClearImmortal(PyObject *op)
 {
     if (op) {
-        assert(op->ob_refcnt == _Py_IMMORTAL_REFCNT);
-        op->ob_refcnt = 1;
+        _Py_SetMortal(op, 1);
         Py_DECREF(op);
     }
 }
@@ -122,6 +169,7 @@ static inline void _Py_ClearImmortal(PyObject *op)
         op = NULL; \
     } while (0)
 
+#if !defined(Py_NOGIL)
 static inline void
 _Py_DECREF_SPECIALIZED(PyObject *op, const destructor destruct)
 {
@@ -161,6 +209,37 @@ _Py_DECREF_NO_DEALLOC(PyObject *op)
 #endif
 }
 
+#else
+// TODO: implement Py_DECREF specializations for Py_NOGIL build
+static inline void
+_Py_DECREF_SPECIALIZED(PyObject *op, const destructor destruct)
+{
+    Py_DECREF(op);
+}
+
+static inline void
+_Py_DECREF_NO_DEALLOC(PyObject *op)
+{
+    Py_DECREF(op);
+}
+
+static inline int
+_Py_REF_IS_MERGED(Py_ssize_t ob_ref_shared)
+{
+    return (ob_ref_shared & _Py_REF_SHARED_FLAG_MASK) == _Py_REF_MERGED;
+}
+
+static inline int
+_Py_REF_IS_QUEUED(Py_ssize_t ob_ref_shared)
+{
+    return (ob_ref_shared & _Py_REF_SHARED_FLAG_MASK) == _Py_REF_QUEUED;
+}
+
+// Merge the local and shared reference count fields and add `extra` to the
+// refcount when merging.
+Py_ssize_t _Py_ExplicitMergeRefcount(PyObject *op, Py_ssize_t extra);
+#endif // !defined(Py_NOGIL)
+
 #ifdef Py_REF_DEBUG
 #  undef _Py_DEC_REFTOTAL
 #endif
index 73fa5f553cb81b7e0988c6c3e86db818870db30d..0799b7e701ce954313d42fe2bd2f1732d7fe1a2f 100644 (file)
@@ -129,13 +129,13 @@ extern PyTypeObject _PyExc_MemoryError;
                     .latin1 = _Py_str_latin1_INIT, \
                 }, \
                 .tuple_empty = { \
-                    .ob_base = _PyVarObject_HEAD_INIT(&PyTuple_Type, 0) \
+                    .ob_base = _PyVarObject_HEAD_INIT(&PyTuple_Type, 0), \
                 }, \
                 .hamt_bitmap_node_empty = { \
-                    .ob_base = _PyVarObject_HEAD_INIT(&_PyHamt_BitmapNode_Type, 0) \
+                    .ob_base = _PyVarObject_HEAD_INIT(&_PyHamt_BitmapNode_Type, 0), \
                 }, \
                 .context_token_missing = { \
-                    .ob_base = _PyObject_HEAD_INIT(&_PyContextTokenMissing_Type) \
+                    .ob_base = _PyObject_HEAD_INIT(&_PyContextTokenMissing_Type), \
                 }, \
             }, \
         }, \
@@ -172,11 +172,11 @@ extern PyTypeObject _PyExc_MemoryError;
             .singletons = { \
                 ._not_used = 1, \
                 .hamt_empty = { \
-                    .ob_base = _PyObject_HEAD_INIT(&_PyHamt_Type) \
+                    .ob_base = _PyObject_HEAD_INIT(&_PyHamt_Type), \
                     .h_root = (PyHamtNode*)&_Py_SINGLETON(hamt_bitmap_node_empty), \
                 }, \
                 .last_resort_memory_error = { \
-                    _PyObject_HEAD_INIT(&_PyExc_MemoryError) \
+                    _PyObject_HEAD_INIT(&_PyExc_MemoryError), \
                     .args = (PyObject*)&_Py_SINGLETON(tuple_empty) \
                 }, \
             }, \
@@ -206,7 +206,7 @@ extern PyTypeObject _PyExc_MemoryError;
 
 #define _PyBytes_SIMPLE_INIT(CH, LEN) \
     { \
-        _PyVarObject_HEAD_INIT(&PyBytes_Type, (LEN)) \
+        _PyVarObject_HEAD_INIT(&PyBytes_Type, (LEN)), \
         .ob_shash = -1, \
         .ob_sval = { (CH) }, \
     }
@@ -217,7 +217,7 @@ extern PyTypeObject _PyExc_MemoryError;
 
 #define _PyUnicode_ASCII_BASE_INIT(LITERAL, ASCII) \
     { \
-        .ob_base = _PyObject_HEAD_INIT(&PyUnicode_Type) \
+        .ob_base = _PyObject_HEAD_INIT(&PyUnicode_Type), \
         .length = sizeof(LITERAL) - 1, \
         .hash = -1, \
         .state = { \
index 9058558e3cd4d9659cfdf68ea499e1462fb6af62..6f116ef35a790c627f4631b8b5b2699a96546c88 100644 (file)
@@ -106,9 +106,26 @@ check by comparing the reference count field to the immortality reference count.
 #define _Py_IMMORTAL_REFCNT (UINT_MAX >> 2)
 #endif
 
+// Py_NOGIL builds indicate immortal objects using `ob_ref_local`, which is
+// always 32-bits.
+#ifdef Py_NOGIL
+#define _Py_IMMORTAL_REFCNT_LOCAL UINT32_MAX
+#endif
+
 // Make all internal uses of PyObject_HEAD_INIT immortal while preserving the
 // C-API expectation that the refcnt will be set to 1.
-#ifdef Py_BUILD_CORE
+#if defined(Py_NOGIL)
+#define PyObject_HEAD_INIT(type)    \
+    {                               \
+        0,                          \
+        0,                          \
+        0,                          \
+        0,                          \
+        _Py_IMMORTAL_REFCNT_LOCAL,  \
+        0,                          \
+        (type),                     \
+    },
+#elif defined(Py_BUILD_CORE)
 #define PyObject_HEAD_INIT(type)    \
     {                               \
         { _Py_IMMORTAL_REFCNT },    \
@@ -142,6 +159,7 @@ check by comparing the reference count field to the immortality reference count.
  * by hand.  Similarly every pointer to a variable-size Python object can,
  * in addition, be cast to PyVarObject*.
  */
+#ifndef Py_NOGIL
 struct _object {
 #if (defined(__GNUC__) || defined(__clang__)) \
         && !(defined __STDC_VERSION__ && __STDC_VERSION__ >= 201112L)
@@ -166,6 +184,36 @@ struct _object {
 
     PyTypeObject *ob_type;
 };
+#else
+// Objects that are not owned by any thread use a thread id (tid) of zero.
+// This includes both immortal objects and objects whose reference count
+// fields have been merged.
+#define _Py_UNOWNED_TID             0
+
+// The shared reference count uses the two least-significant bits to store
+// flags. The remaining bits are used to store the reference count.
+#define _Py_REF_SHARED_SHIFT        2
+#define _Py_REF_SHARED_FLAG_MASK    0x3
+
+// The shared flags are initialized to zero.
+#define _Py_REF_SHARED_INIT         0x0
+#define _Py_REF_MAYBE_WEAKREF       0x1
+#define _Py_REF_QUEUED              0x2
+#define _Py_REF_MERGED              0x3
+
+// Create a shared field from a refcnt and desired flags
+#define _Py_REF_SHARED(refcnt, flags) (((refcnt) << _Py_REF_SHARED_SHIFT) + (flags))
+
+struct _object {
+    uintptr_t ob_tid;           // thread id (or zero)
+    uint16_t _padding;
+    uint8_t ob_mutex;           // per-object lock
+    uint8_t ob_gc_bits;         // gc-related state
+    uint32_t ob_ref_local;      // local reference count
+    Py_ssize_t ob_ref_shared;   // shared (atomic) reference count
+    PyTypeObject *ob_type;
+};
+#endif
 
 /* Cast argument to PyObject* type. */
 #define _PyObject_CAST(op) _Py_CAST(PyObject*, (op))
@@ -183,9 +231,56 @@ typedef struct {
 PyAPI_FUNC(int) Py_Is(PyObject *x, PyObject *y);
 #define Py_Is(x, y) ((x) == (y))
 
+#ifndef Py_LIMITED_API
+static inline uintptr_t
+_Py_ThreadId(void)
+{
+    uintptr_t tid;
+#if defined(_MSC_VER) && defined(_M_X64)
+    tid = __readgsqword(48);
+#elif defined(_MSC_VER) && defined(_M_IX86)
+    tid = __readfsdword(24);
+#elif defined(_MSC_VER) && defined(_M_ARM64)
+    tid = __getReg(18);
+#elif defined(__i386__)
+    __asm__("movl %%gs:0, %0" : "=r" (tid));  // 32-bit always uses GS
+#elif defined(__MACH__) && defined(__x86_64__)
+    __asm__("movq %%gs:0, %0" : "=r" (tid));  // x86_64 macOSX uses GS
+#elif defined(__x86_64__)
+   __asm__("movq %%fs:0, %0" : "=r" (tid));  // x86_64 Linux, BSD uses FS
+#elif defined(__arm__)
+    __asm__ ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tid));
+#elif defined(__aarch64__) && defined(__APPLE__)
+    __asm__ ("mrs %0, tpidrro_el0" : "=r" (tid));
+#elif defined(__aarch64__)
+    __asm__ ("mrs %0, tpidr_el0" : "=r" (tid));
+#else
+  # error "define _Py_ThreadId for this platform"
+#endif
+  return tid;
+}
+#endif
+
+#if defined(Py_NOGIL) && !defined(Py_LIMITED_API)
+static inline Py_ALWAYS_INLINE int
+_Py_IsOwnedByCurrentThread(PyObject *ob)
+{
+    return ob->ob_tid == _Py_ThreadId();
+}
+#endif
 
 static inline Py_ssize_t Py_REFCNT(PyObject *ob) {
+#if !defined(Py_NOGIL)
     return ob->ob_refcnt;
+#else
+    uint32_t local = _Py_atomic_load_uint32_relaxed(&ob->ob_ref_local);
+    if (local == _Py_IMMORTAL_REFCNT_LOCAL) {
+        return _Py_IMMORTAL_REFCNT;
+    }
+    Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&ob->ob_ref_shared);
+    return _Py_STATIC_CAST(Py_ssize_t, local) +
+           Py_ARITHMETIC_RIGHT_SHIFT(Py_ssize_t, shared, _Py_REF_SHARED_SHIFT);
+#endif
 }
 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
 #  define Py_REFCNT(ob) Py_REFCNT(_PyObject_CAST(ob))
@@ -216,7 +311,9 @@ static inline Py_ssize_t Py_SIZE(PyObject *ob) {
 
 static inline Py_ALWAYS_INLINE int _Py_IsImmortal(PyObject *op)
 {
-#if SIZEOF_VOID_P > 4
+#if defined(Py_NOGIL)
+    return op->ob_ref_local == _Py_IMMORTAL_REFCNT_LOCAL;
+#elif SIZEOF_VOID_P > 4
     return _Py_CAST(PY_INT32_T, op->ob_refcnt) < 0;
 #else
     return op->ob_refcnt == _Py_IMMORTAL_REFCNT;
@@ -240,7 +337,24 @@ static inline void Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt) {
     if (_Py_IsImmortal(ob)) {
         return;
     }
+#if !defined(Py_NOGIL)
     ob->ob_refcnt = refcnt;
+#else
+    if (_Py_IsOwnedByCurrentThread(ob)) {
+        // Set local refcount to desired refcount and shared refcount to zero,
+        // but preserve the shared refcount flags.
+        assert(refcnt < UINT32_MAX);
+        ob->ob_ref_local = _Py_STATIC_CAST(uint32_t, refcnt);
+        ob->ob_ref_shared &= _Py_REF_SHARED_FLAG_MASK;
+    }
+    else {
+        // Set local refcount to zero and shared refcount to desired refcount.
+        // Mark the object as merged.
+        ob->ob_tid = _Py_UNOWNED_TID;
+        ob->ob_ref_local = 0;
+        ob->ob_ref_shared = _Py_REF_SHARED(refcnt, _Py_REF_MERGED);
+    }
+#endif
 }
 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
 #  define Py_SET_REFCNT(ob, refcnt) Py_SET_REFCNT(_PyObject_CAST(ob), (refcnt))
@@ -618,7 +732,19 @@ static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op)
 #else
     // Non-limited C API and limited C API for Python 3.9 and older access
     // directly PyObject.ob_refcnt.
-#if SIZEOF_VOID_P > 4
+#if defined(Py_NOGIL)
+    uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local);
+    uint32_t new_local = local + 1;
+    if (new_local == 0) {
+        return;
+    }
+    if (_Py_IsOwnedByCurrentThread(op)) {
+        _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, new_local);
+    }
+    else {
+        _Py_atomic_add_ssize(&op->ob_ref_shared, (1 << _Py_REF_SHARED_SHIFT));
+    }
+#elif SIZEOF_VOID_P > 4
     // Portable saturated add, branching on the carry flag and set low bits
     PY_UINT32_T cur_refcnt = op->ob_refcnt_split[PY_BIG_ENDIAN];
     PY_UINT32_T new_refcnt = cur_refcnt + 1;
@@ -643,6 +769,19 @@ static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op)
 #  define Py_INCREF(op) Py_INCREF(_PyObject_CAST(op))
 #endif
 
+
+#if !defined(Py_LIMITED_API) && defined(Py_NOGIL)
+// Implements Py_DECREF on objects not owned by the current thread.
+PyAPI_FUNC(void) _Py_DecRefShared(PyObject *);
+PyAPI_FUNC(void) _Py_DecRefSharedDebug(PyObject *, const char *, int);
+
+// Called from Py_DECREF by the owning thread when the local refcount reaches
+// zero. The call will deallocate the object if the shared refcount is also
+// zero. Otherwise, the thread gives up ownership and merges the reference
+// count fields.
+PyAPI_FUNC(void) _Py_MergeZeroLocalRefcount(PyObject *);
+#endif
+
 #if defined(Py_LIMITED_API) && (Py_LIMITED_API+0 >= 0x030c0000 || defined(Py_REF_DEBUG))
 // Stable ABI implements Py_DECREF() as a function call on limited C API
 // version 3.12 and newer, and on Python built in debug mode. _Py_DecRef() was
@@ -657,6 +796,52 @@ static inline void Py_DECREF(PyObject *op) {
 }
 #define Py_DECREF(op) Py_DECREF(_PyObject_CAST(op))
 
+#elif defined(Py_NOGIL) && defined(Py_REF_DEBUG)
+static inline void Py_DECREF(const char *filename, int lineno, PyObject *op)
+{
+    uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local);
+    if (local == _Py_IMMORTAL_REFCNT_LOCAL) {
+        return;
+    }
+    _Py_DECREF_STAT_INC();
+    _Py_DECREF_DecRefTotal();
+    if (_Py_IsOwnedByCurrentThread(op)) {
+        if (local == 0) {
+            _Py_NegativeRefcount(filename, lineno, op);
+        }
+        local--;
+        _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, local);
+        if (local == 0) {
+            _Py_MergeZeroLocalRefcount(op);
+        }
+    }
+    else {
+        _Py_DecRefSharedDebug(op, filename, lineno);
+    }
+}
+#define Py_DECREF(op) Py_DECREF(__FILE__, __LINE__, _PyObject_CAST(op))
+
+#elif defined(Py_NOGIL)
+static inline void Py_DECREF(PyObject *op)
+{
+    uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local);
+    if (local == _Py_IMMORTAL_REFCNT_LOCAL) {
+        return;
+    }
+    _Py_DECREF_STAT_INC();
+    if (_Py_IsOwnedByCurrentThread(op)) {
+        local--;
+        _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, local);
+        if (local == 0) {
+            _Py_MergeZeroLocalRefcount(op);
+        }
+    }
+    else {
+        _Py_DecRefShared(op);
+    }
+}
+#define Py_DECREF(op) Py_DECREF(_PyObject_CAST(op))
+
 #elif defined(Py_REF_DEBUG)
 static inline void Py_DECREF(const char *filename, int lineno, PyObject *op)
 {
index cb09a7ff438243ab675c73ff31b4d129da0eeaba..de7db70275441a3edb78101bcc6cd9fff4390f4f 100644 (file)
@@ -796,7 +796,10 @@ def check_cflags_pgo():
     return any(option in cflags_nodist for option in pgo_options)
 
 
-_header = 'nP'
+if sysconfig.get_config_var('Py_NOGIL'):
+    _header = 'PHBBInP'
+else:
+    _header = 'nP'
 _align = '0n'
 _vheader = _header + 'n'
 
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-10-12-12-09-01.gh-issue-110481.3Er3it.rst b/Misc/NEWS.d/next/Core and Builtins/2023-10-12-12-09-01.gh-issue-110481.3Er3it.rst
new file mode 100644 (file)
index 0000000..984548a
--- /dev/null
@@ -0,0 +1 @@
+Implement biased reference counting in ``--disable-gil`` builds.
index df11c00237f9a6997ecd776d9f48bdda31f430db..12d372ff16702111f0d8660007fbecf755d8bfd4 100644 (file)
@@ -1,5 +1,11 @@
+#ifndef _MSC_VER
+#include "pyconfig.h"   // Py_NOGIL
+#endif
+
+#ifndef Py_NOGIL
 // Need limited C API version 3.13 for Py_MOD_PER_INTERPRETER_GIL_SUPPORTED
 #define Py_LIMITED_API 0x030d0000
+#endif
 
 // gh-85283: On Windows, Py_LIMITED_API requires Py_BUILD_CORE to not attempt
 // linking the extension to python3.lib (which fails). Py_BUILD_CORE_MODULE is
index c4d1138534d8c58cdc3aa1554f94547e26ad77c1..dc3c59d01e0cb19a4fe817948c315867fbab9dbe 100644 (file)
@@ -2,8 +2,12 @@
 posixshmem - A Python extension that provides shm_open() and shm_unlink()
 */
 
+#include "pyconfig.h"   // Py_NOGIL
+
+#ifndef Py_NOGIL
 // Need limited C API version 3.13 for Py_MOD_PER_INTERPRETER_GIL_SUPPORTED
 #define Py_LIMITED_API 0x030d0000
+#endif
 
 #include <Python.h>
 
index 2f1b8618eb9fcc2a63396dfd0ad9bbad80a33c2c..c8e120e24b18b962a81c39f1d1464b75a6ef968a 100644 (file)
@@ -3,8 +3,14 @@
  * using the SystemConfiguration framework.
  */
 
+#ifndef _MSC_VER
+#include "pyconfig.h"   // Py_NOGIL
+#endif
+
+#ifndef Py_NOGIL
 // Need limited C API version 3.13 for Py_MOD_PER_INTERPRETER_GIL_SUPPORTED
 #define Py_LIMITED_API 0x030d0000
+#endif
 
 #include <Python.h>
 #include <SystemConfiguration/SystemConfiguration.h>
index 402fbbaecf8dd8ac271de09b6ee25e34782e800a..d0035608aad0b322f601fd01a467834ebe424bb3 100644 (file)
  *
  */
 
+#ifndef _MSC_VER
+#include "pyconfig.h"   // Py_NOGIL
+#endif
+
 // Need limited C API version 3.13 for PyModule_Add() on Windows
+#ifndef Py_NOGIL
 #define Py_LIMITED_API 0x030d0000
+#endif
 
 #include "Python.h"
 
index 53dd01d1ed4f802d3a1073021bdc44acfb466d7d..b58d26cddc723ac6bd54cefa5910323ada68579b 100644 (file)
@@ -1,4 +1,11 @@
+#ifndef _MSC_VER
+#include "pyconfig.h"   // Py_NOGIL
+#endif
+
+#ifndef Py_NOGIL
 #define Py_LIMITED_API 0x030c0000 // 3.12
+#endif
+
 #include "parts.h"
 #include <stddef.h>               // max_align_t
 #include <string.h>               // memset
index 3e81903098f95489a41648cba3262a2080fde94c..857cb30b3da1cac01576bc7743c75c6599ffcb17 100644 (file)
@@ -1,6 +1,13 @@
 /* Test Vectorcall in the limited API */
 
+#ifndef _MSC_VER
+#include "pyconfig.h"   // Py_NOGIL
+#endif
+
+#ifndef Py_NOGIL
 #define Py_LIMITED_API 0x030c0000 // 3.12
+#endif
+
 #include "parts.h"
 #include "clinic/vectorcall_limited.c.h"
 
index 4273383816a0dd582e967739cb35139989cb7c4f..63ebb5292602f8f879f85faf5be9119d3f11c80f 100644 (file)
@@ -4,8 +4,14 @@
 #undef Py_BUILD_CORE_MODULE
 #undef Py_BUILD_CORE_BUILTIN
 
+#ifndef _MSC_VER
+#include "pyconfig.h"   // Py_NOGIL
+#endif
+
 // For now, only limited C API 3.13 is supported
+#ifndef Py_NOGIL
 #define Py_LIMITED_API 0x030d0000
+#endif
 
 /* Always enable assertions */
 #undef NDEBUG
index a13024d6cd029e1102f99c5f5ff792176ae3b828..99d48b6617f49ca2c4b62a5d278645457094d653 100644 (file)
@@ -4,7 +4,13 @@
  * foo, bar), only the first one is called the same as the compiled file.
  */
 
+#ifndef _MSC_VER
+#include "pyconfig.h"   // Py_NOGIL
+#endif
+
+#ifndef Py_NOGIL
 #define Py_LIMITED_API 0x03020000
+#endif
 
 #include <Python.h>
 
index b35cccb5bdc406d9c89c837e90de03a483480544..8bda1d602473a1fb701b9ba74f1b6b4012d88931 100644 (file)
@@ -3,8 +3,14 @@
  * DCE compatible Universally Unique Identifier library.
  */
 
+#ifndef _MSC_VER
+#include "pyconfig.h"   // Py_NOGIL
+#endif
+
+#ifndef Py_NOGIL
 // Need limited C API version 3.13 for Py_MOD_PER_INTERPRETER_GIL_SUPPORTED
 #define Py_LIMITED_API 0x030d0000
+#endif
 
 #include "Python.h"
 #if defined(HAVE_UUID_H)
index e4fd3b47762702f9134ce37a235efea33a1fc92e..5ac4d29834325417792aa9ad6b5432e8efc1c202 100644 (file)
@@ -1,7 +1,13 @@
 /* Errno module */
 
+#ifndef _MSC_VER
+#include "pyconfig.h"   // Py_NOGIL
+#endif
+
+#ifndef Py_NOGIL
 // Need limited C API version 3.13 for Py_MOD_PER_INTERPRETER_GIL_SUPPORTED
 #define Py_LIMITED_API 0x030d0000
+#endif
 
 #include "Python.h"
 #include <errno.h>                // EPIPE
index 9d412ba580c336d5dfb21352bbba4249caa25153..ff7fc09bca399666cb396307c932b7b4ff40b8ff 100644 (file)
 
 /* MD5 objects */
 
+#ifndef _MSC_VER
+#include "pyconfig.h"   // Py_NOGIL
+#endif
+
+#ifndef Py_NOGIL
 // Need limited C API version 3.13 for Py_MOD_PER_INTERPRETER_GIL_SUPPORTED
 #define Py_LIMITED_API 0x030d0000
+#endif
 
 #include "Python.h"
 #include "hashlib.h"
index c973008aaa6bd43547f980a91316eba23227dbcc..eb9c2e27fdba85ca4c3beea1cb58f09323fdd89d 100644 (file)
@@ -1,5 +1,11 @@
+#ifndef _MSC_VER
+#include "pyconfig.h"   // Py_NOGIL
+#endif
+
+#ifndef Py_NOGIL
 // Need limited C API version 3.13 for Py_MOD_PER_INTERPRETER_GIL_SUPPORTED
 #define Py_LIMITED_API 0x030d0000
+#endif
 
 #include "Python.h"
 #include <errno.h>                // errno
index df6e593b320e5227517e12b4705865edbe5a998b..3dcf74ab1190d1ab1fa0a95d42f01cc9d28bda95 100644 (file)
           pass
    */
 
+#ifndef _MSC_VER
+#include "pyconfig.h"   // Py_NOGIL
+#endif
+
+#ifndef Py_NOGIL
 // Need limited C API version 3.13 for Py_MOD_PER_INTERPRETER_GIL_SUPPORTED
 #define Py_LIMITED_API 0x030d0000
+#endif
 
 #include "Python.h"
 #include <string.h>
index 361c7e76d77f5082e7f0209c09b3a14cb38fa1a3..04673ea68cfee5f92912b153edc823d5d5dc6ab1 100644 (file)
@@ -5,7 +5,13 @@
  * See the xxlimited module for an extension module template.
  */
 
+#ifndef _MSC_VER
+#include "pyconfig.h"   // Py_NOGIL
+#endif
+
+#ifndef Py_NOGIL
 #define Py_LIMITED_API 0x03050000
+#endif
 
 #include "Python.h"
 
index 791db755663e68bcdda17d82334672d23ee82294..35c7e7bf33b13508e697fbe2ea97391fd32642f0 100644 (file)
@@ -296,6 +296,124 @@ _Py_DecRef(PyObject *o)
     Py_DECREF(o);
 }
 
+#ifdef Py_NOGIL
+static inline int
+is_shared_refcnt_dead(Py_ssize_t shared)
+{
+#if SIZEOF_SIZE_T == 8
+    return shared == (Py_ssize_t)0xDDDDDDDDDDDDDDDD;
+#else
+    return shared == (Py_ssize_t)0xDDDDDDDD;
+#endif
+}
+
+void
+_Py_DecRefSharedDebug(PyObject *o, const char *filename, int lineno)
+{
+    // Should we queue the object for the owning thread to merge?
+    int should_queue;
+
+    Py_ssize_t new_shared;
+    Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&o->ob_ref_shared);
+    do {
+        should_queue = (shared == 0 || shared == _Py_REF_MAYBE_WEAKREF);
+
+        if (should_queue) {
+            // If the object had refcount zero, not queued, and not merged,
+            // then we enqueue the object to be merged by the owning thread.
+            // In this case, we don't subtract one from the reference count
+            // because the queue holds a reference.
+            new_shared = _Py_REF_QUEUED;
+        }
+        else {
+            // Otherwise, subtract one from the reference count. This might
+            // be negative!
+            new_shared = shared - (1 << _Py_REF_SHARED_SHIFT);
+        }
+
+#ifdef Py_REF_DEBUG
+        if ((_Py_REF_IS_MERGED(new_shared) && new_shared < 0) ||
+            is_shared_refcnt_dead(shared))
+        {
+            _Py_NegativeRefcount(filename, lineno, o);
+        }
+#endif
+    } while (!_Py_atomic_compare_exchange_ssize(&o->ob_ref_shared,
+                                                &shared, new_shared));
+
+    if (should_queue) {
+        // TODO: the inter-thread queue is not yet implemented. For now,
+        // we just merge the refcount here.
+        Py_ssize_t refcount = _Py_ExplicitMergeRefcount(o, -1);
+        if (refcount == 0) {
+            _Py_Dealloc(o);
+        }
+    }
+    else if (new_shared == _Py_REF_MERGED) {
+        // refcount is zero AND merged
+        _Py_Dealloc(o);
+    }
+}
+
+void
+_Py_DecRefShared(PyObject *o)
+{
+    _Py_DecRefSharedDebug(o, NULL, 0);
+}
+
+void
+_Py_MergeZeroLocalRefcount(PyObject *op)
+{
+    assert(op->ob_ref_local == 0);
+
+    _Py_atomic_store_uintptr_relaxed(&op->ob_tid, 0);
+    Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&op->ob_ref_shared);
+    if (shared == 0) {
+        // Fast-path: shared refcount is zero (including flags)
+        _Py_Dealloc(op);
+        return;
+    }
+
+    // Slow-path: atomically set the flags (low two bits) to _Py_REF_MERGED.
+    Py_ssize_t new_shared;
+    do {
+        new_shared = (shared & ~_Py_REF_SHARED_FLAG_MASK) | _Py_REF_MERGED;
+    } while (!_Py_atomic_compare_exchange_ssize(&op->ob_ref_shared,
+                                                &shared, new_shared));
+
+    if (new_shared == _Py_REF_MERGED) {
+        // i.e., the shared refcount is zero (only the flags are set) so we
+        // deallocate the object.
+        _Py_Dealloc(op);
+    }
+}
+
+Py_ssize_t
+_Py_ExplicitMergeRefcount(PyObject *op, Py_ssize_t extra)
+{
+    assert(!_Py_IsImmortal(op));
+    Py_ssize_t refcnt;
+    Py_ssize_t new_shared;
+    Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&op->ob_ref_shared);
+    do {
+        refcnt = Py_ARITHMETIC_RIGHT_SHIFT(Py_ssize_t, shared, _Py_REF_SHARED_SHIFT);
+        if (_Py_REF_IS_MERGED(shared)) {
+            return refcnt;
+        }
+
+        refcnt += (Py_ssize_t)op->ob_ref_local;
+        refcnt += extra;
+
+        new_shared = _Py_REF_SHARED(refcnt, _Py_REF_MERGED);
+    } while (!_Py_atomic_compare_exchange_ssize(&op->ob_ref_shared,
+                                                &shared, new_shared));
+
+    _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, 0);
+    _Py_atomic_store_uintptr_relaxed(&op->ob_tid, 0);
+    return refcnt;
+}
+#endif
+
 
 /**************************************/
 
@@ -1926,10 +2044,7 @@ PyTypeObject _PyNone_Type = {
     none_new,           /*tp_new */
 };
 
-PyObject _Py_NoneStruct = {
-    { _Py_IMMORTAL_REFCNT },
-    &_PyNone_Type
-};
+PyObject _Py_NoneStruct = _PyObject_HEAD_INIT(&_PyNone_Type);
 
 /* NotImplemented is an object that can be used to signal that an
    operation is not implemented for the given type combination. */
@@ -2028,10 +2143,7 @@ PyTypeObject _PyNotImplemented_Type = {
     notimplemented_new, /*tp_new */
 };
 
-PyObject _Py_NotImplementedStruct = {
-    { _Py_IMMORTAL_REFCNT },
-    &_PyNotImplemented_Type
-};
+PyObject _Py_NotImplementedStruct = _PyObject_HEAD_INIT(&_PyNotImplemented_Type);
 
 
 PyStatus
@@ -2248,7 +2360,16 @@ new_reference(PyObject *op)
         _PyTraceMalloc_NewReference(op);
     }
     // Skip the immortal object check in Py_SET_REFCNT; always set refcnt to 1
+#if !defined(Py_NOGIL)
     op->ob_refcnt = 1;
+#else
+    op->ob_tid = _Py_ThreadId();
+    op->_padding = 0;
+    op->ob_mutex = 0;
+    op->ob_gc_bits = 0;
+    op->ob_ref_local = 1;
+    op->ob_ref_shared = 0;
+#endif
 #ifdef Py_TRACE_REFS
     _Py_AddToAllObjects(op);
 #endif
index 2a3514f2028c3d8b6cf58eeb3fe41a9a81db1499..76449c6a2a9ee749678eef08a2443a14453741c9 100644 (file)
@@ -2394,7 +2394,4 @@ static PyTypeObject _PySetDummy_Type = {
     Py_TPFLAGS_DEFAULT, /*tp_flags */
 };
 
-static PyObject _dummy_struct = {
-    { _Py_IMMORTAL_REFCNT },
-    &_PySetDummy_Type
-};
+static PyObject _dummy_struct = _PyObject_HEAD_INIT(&_PySetDummy_Type);
index 1513f3e49abb44dbc9081d43c4669d632a9699ab..a3ed0c096d84ed9db96ae4b03a4d7f92d7a7188e 100644 (file)
@@ -98,10 +98,7 @@ PyTypeObject PyEllipsis_Type = {
     ellipsis_new,                       /* tp_new */
 };
 
-PyObject _Py_EllipsisObject = {
-    { _Py_IMMORTAL_REFCNT },
-    &PyEllipsis_Type
-};
+PyObject _Py_EllipsisObject = _PyObject_HEAD_INIT(&PyEllipsis_Type);
 
 
 /* Slice object implementation */
index 80b19567c63d2086c9271521791edc45b95db0ab..87636efcfca0503dea153f0054782bfbb805f168 100644 (file)
@@ -14967,7 +14967,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
             // Skip the Immortal Instance check and restore
             // the two references (key and value) ignored
             // by PyUnicode_InternInPlace().
-            s->ob_refcnt = 2;
+            _Py_SetMortal(s, 2);
 #ifdef INTERNED_STATS
             total_length += PyUnicode_GET_LENGTH(s);
 #endif
index ae36936703b0c3cd47634fab827292f0a04a5368..36e4d07420e650bd337b2fef902ccea9e947c1e4 100644 (file)
    winsound.PlaySound(None, 0)
 */
 
+#ifndef Py_NOGIL
 // Need limited C API version 3.13 for Py_MOD_PER_INTERPRETER_GIL_SUPPORTED
 #define Py_LIMITED_API 0x030d0000
+#endif
 
 #include <Python.h>
 #include <windows.h>
index e3a7c5f38403a78d4358f458845357a788a9c349..6f8584c15b7bde8f2db93ae74a5fe725b5d05fb8 100644 (file)
 #  error "ceval.c must be build with Py_BUILD_CORE define for best performance"
 #endif
 
-#if !defined(Py_DEBUG) && !defined(Py_TRACE_REFS)
+#if !defined(Py_DEBUG) && !defined(Py_TRACE_REFS) && !defined(Py_NOGIL)
 // GH-89279: The MSVC compiler does not inline these static inline functions
 // in PGO build in _PyEval_EvalFrameDefault(), because this function is over
 // the limit of PGO, and that limit cannot be configured.
 // Define them as macros to make sure that they are always inlined by the
 // preprocessor.
+// TODO: implement Py_DECREF macro for Py_NOGIL
 
 #undef Py_DECREF
 #define Py_DECREF(arg) \
index 5fd65d53c503f8d199f59017c978d793e92a5336..9ee11588e448aea9c48490570b855d4f7679398b 100644 (file)
 /* Uncomment this to dump debugging output when assertions fail */
 // #define INSTRUMENT_DEBUG 1
 
-PyObject _PyInstrumentation_DISABLE =
-{
-    .ob_refcnt = _Py_IMMORTAL_REFCNT,
-    .ob_type = &PyBaseObject_Type
-};
+PyObject _PyInstrumentation_DISABLE = _PyObject_HEAD_INIT(&PyBaseObject_Type);
 
-PyObject _PyInstrumentation_MISSING =
-{
-    .ob_refcnt = _Py_IMMORTAL_REFCNT,
-    .ob_type = &PyBaseObject_Type
-};
+PyObject _PyInstrumentation_MISSING = _PyObject_HEAD_INIT(&PyBaseObject_Type);
 
 static const int8_t EVENT_FOR_OPCODE[256] = {
     [RETURN_CONST] = PY_MONITORING_EVENT_PY_RETURN,
index 07fd93d29b09dce32207afff37d0d8392412e060..d74c4c58a94523cacf3be645aa59f392f2aa6ca6 100644 (file)
@@ -2525,7 +2525,7 @@ static const PyBytesObject no_location = {
 };
 
 const struct _PyCode_DEF(8) _Py_InitCleanup = {
-    _PyVarObject_HEAD_INIT(&PyCode_Type, 3)
+    _PyVarObject_HEAD_INIT(&PyCode_Type, 3),
     .co_consts = (PyObject *)&_Py_SINGLETON(tuple_empty),
     .co_names = (PyObject *)&_Py_SINGLETON(tuple_empty),
     .co_exceptiontable = (PyObject *)&_Py_SINGLETON(bytes_empty),
index c3231a5a40c32636a4bd931458a14a6c1a61994e..fed7cf3ac6d3963804cce99f844301dd1626fd91 100644 (file)
@@ -115,6 +115,7 @@ class Printer:
         self.inits: list[str] = []
         self.identifiers, self.strings = self.get_identifiers_and_strings()
         self.write('#include "Python.h"')
+        self.write('#include "internal/pycore_object.h"')
         self.write('#include "internal/pycore_gc.h"')
         self.write('#include "internal/pycore_code.h"')
         self.write('#include "internal/pycore_frame.h"')
@@ -154,14 +155,10 @@ class Printer:
         self.write("}" + suffix)
 
     def object_head(self, typename: str) -> None:
-        with self.block(".ob_base =", ","):
-            self.write(f".ob_refcnt = _Py_IMMORTAL_REFCNT,")
-            self.write(f".ob_type = &{typename},")
+        self.write(f".ob_base = _PyObject_HEAD_INIT(&{typename}),")
 
     def object_var_head(self, typename: str, size: int) -> None:
-        with self.block(".ob_base =", ","):
-            self.object_head(typename)
-            self.write(f".ob_size = {size},")
+        self.write(f".ob_base = _PyVarObject_HEAD_INIT(&{typename}, {size}),")
 
     def field(self, obj: object, name: str) -> None:
         self.write(f".{name} = {getattr(obj, name)},")