]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-121368: Fix seq lock memory ordering in _PyType_Lookup (#121388)
authorSam Gross <colesbury@gmail.com>
Mon, 8 Jul 2024 18:52:07 +0000 (14:52 -0400)
committerGitHub <noreply@github.com>
Mon, 8 Jul 2024 18:52:07 +0000 (14:52 -0400)
The `_PySeqLock_EndRead` function needs an acquire fence to ensure that
the load of the sequence happens after any loads within the read side
critical section. The missing fence can trigger bugs on macOS arm64.

Additionally, we need a release fence in `_PySeqLock_LockWrite` to
ensure that the sequence update is visible before any modifications to
the cache entry.

Include/cpython/pyatomic.h
Include/cpython/pyatomic_gcc.h
Include/cpython/pyatomic_msc.h
Include/cpython/pyatomic_std.h
Include/internal/pycore_lock.h
Misc/NEWS.d/next/Core and Builtins/2024-07-04-23-38-30.gh-issue-121368.m3EF9E.rst [new file with mode: 0644]
Modules/_testcapi/pyatomic.c
Objects/typeobject.c
Python/lock.c

index 55a139bb9158db993832b6f0e5375c1d7925a4dc..4ecef4f56edf420ebb3c28cc1d597977a0ac727e 100644 (file)
@@ -510,6 +510,9 @@ _Py_atomic_load_ssize_acquire(const Py_ssize_t *obj);
 // See https://en.cppreference.com/w/cpp/atomic/atomic_thread_fence
 static inline void _Py_atomic_fence_seq_cst(void);
 
+// Acquire fence
+static inline void _Py_atomic_fence_acquire(void);
+
 // Release fence
 static inline void _Py_atomic_fence_release(void);
 
index f2ebdeeb5524e47d8444d0777ea2d7fedba9b7a1..ef09954d53ac1d61819856ed5c59a1fca1f1da28 100644 (file)
@@ -542,6 +542,10 @@ static inline void
 _Py_atomic_fence_seq_cst(void)
 { __atomic_thread_fence(__ATOMIC_SEQ_CST); }
 
+ static inline void
+_Py_atomic_fence_acquire(void)
+{ __atomic_thread_fence(__ATOMIC_ACQUIRE); }
+
  static inline void
 _Py_atomic_fence_release(void)
 { __atomic_thread_fence(__ATOMIC_RELEASE); }
index f32995c1f578ac19414c0d1dfd9e22c1ae2183c9..84da21bdcbff4f1ed3bb549db4dfd7350b5dfffe 100644 (file)
@@ -1066,6 +1066,18 @@ _Py_atomic_fence_seq_cst(void)
 #else
 #  error "no implementation of _Py_atomic_fence_seq_cst"
 #endif
+}
+
+ static inline void
+_Py_atomic_fence_acquire(void)
+{
+#if defined(_M_ARM64)
+    __dmb(_ARM64_BARRIER_ISHLD);
+#elif defined(_M_X64) || defined(_M_IX86)
+    _ReadBarrier();
+#else
+#  error "no implementation of _Py_atomic_fence_acquire"
+#endif
 }
 
  static inline void
index 0cdce4e6dd39f0f6a472e847ac6e1184d32b8165..7c71e94c68f8e6d16664201bd71f8538c10d361d 100644 (file)
@@ -961,6 +961,13 @@ _Py_atomic_fence_seq_cst(void)
     atomic_thread_fence(memory_order_seq_cst);
 }
 
+ static inline void
+_Py_atomic_fence_acquire(void)
+{
+    _Py_USING_STD;
+    atomic_thread_fence(memory_order_acquire);
+}
+
  static inline void
 _Py_atomic_fence_release(void)
 {
index 3824434f3f375d300761ba9d60a7ef404c7b4735..e6da083b807ce5b271fc578e092c3ebf0fa46912 100644 (file)
@@ -228,12 +228,12 @@ PyAPI_FUNC(void) _PySeqLock_AbandonWrite(_PySeqLock *seqlock);
 PyAPI_FUNC(uint32_t) _PySeqLock_BeginRead(_PySeqLock *seqlock);
 
 // End the read operation and confirm that the sequence number has not changed.
-// Returns 1 if the read was successful or 0 if the read should be re-tried.
-PyAPI_FUNC(uint32_t) _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous);
+// Returns 1 if the read was successful or 0 if the read should be retried.
+PyAPI_FUNC(int) _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous);
 
 // Check if the lock was held during a fork and clear the lock.  Returns 1
-// if the lock was held and any associated datat should be cleared.
-PyAPI_FUNC(uint32_t) _PySeqLock_AfterFork(_PySeqLock *seqlock);
+// if the lock was held and any associated data should be cleared.
+PyAPI_FUNC(int) _PySeqLock_AfterFork(_PySeqLock *seqlock);
 
 #ifdef __cplusplus
 }
diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-07-04-23-38-30.gh-issue-121368.m3EF9E.rst b/Misc/NEWS.d/next/Core and Builtins/2024-07-04-23-38-30.gh-issue-121368.m3EF9E.rst
new file mode 100644 (file)
index 0000000..3df5b21
--- /dev/null
@@ -0,0 +1,3 @@
+Fix race condition in ``_PyType_Lookup`` in the free-threaded build due to
+a missing memory fence.  This could lead to ``_PyType_Lookup`` returning
+incorrect results on arm64.
index 4f72844535ebd645d8a7692f9c6fdcf67637c472..850de6f9c3366b134df5bf15e632137cd5cd9dd5 100644 (file)
@@ -125,6 +125,7 @@ test_atomic_fences(PyObject *self, PyObject *obj) {
     // Just make sure that the fences compile. We are not
     // testing any synchronizing ordering.
     _Py_atomic_fence_seq_cst();
+    _Py_atomic_fence_acquire();
     _Py_atomic_fence_release();
     Py_RETURN_NONE;
 }
index 447e561c0d4440f71957b2c2ae890b0b8a976da8..df895bc65983c0c41ad4300ebeda2ced1509ad04 100644 (file)
@@ -5387,7 +5387,7 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name)
 #ifdef Py_GIL_DISABLED
     // synchronize-with other writing threads by doing an acquire load on the sequence
     while (1) {
-        int sequence = _PySeqLock_BeginRead(&entry->sequence);
+        uint32_t sequence = _PySeqLock_BeginRead(&entry->sequence);
         uint32_t entry_version = _Py_atomic_load_uint32_relaxed(&entry->version);
         uint32_t type_version = _Py_atomic_load_uint32_acquire(&type->tp_version_tag);
         if (entry_version == type_version &&
index 7c6a5175e88ff1108b6377b3d5a42727e390dce3..57675fe1873fa284dd3a051bc6be424d45a269b5 100644 (file)
@@ -514,6 +514,7 @@ void _PySeqLock_LockWrite(_PySeqLock *seqlock)
         }
         else if (_Py_atomic_compare_exchange_uint32(&seqlock->sequence, &prev, prev + 1)) {
             // We've locked the cache
+            _Py_atomic_fence_release();
             break;
         }
         else {
@@ -547,28 +548,31 @@ uint32_t _PySeqLock_BeginRead(_PySeqLock *seqlock)
     return sequence;
 }
 
-uint32_t _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous)
+int _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous)
 {
-    // Synchronize again and validate that the entry hasn't been updated
-    // while we were readying the values.
-     if (_Py_atomic_load_uint32_acquire(&seqlock->sequence) == previous) {
+    // gh-121368: We need an explicit acquire fence here to ensure that
+    // this load of the sequence number is not reordered before any loads
+    // within the read lock.
+    _Py_atomic_fence_acquire();
+
+    if (_Py_atomic_load_uint32_relaxed(&seqlock->sequence) == previous) {
         return 1;
-     }
+    }
 
-     _Py_yield();
-     return 0;
+    _Py_yield();
+    return 0;
 }
 
-uint32_t _PySeqLock_AfterFork(_PySeqLock *seqlock)
+int _PySeqLock_AfterFork(_PySeqLock *seqlock)
 {
     // Synchronize again and validate that the entry hasn't been updated
     // while we were readying the values.
-     if (SEQLOCK_IS_UPDATING(seqlock->sequence)) {
+    if (SEQLOCK_IS_UPDATING(seqlock->sequence)) {
         seqlock->sequence = 0;
         return 1;
-     }
+    }
 
-     return 0;
+    return 0;
 }
 
 #undef PyMutex_Lock