gh-121368: Fix seq lock memory ordering in _PyType_Lookup (#121388)

author Sam Gross <colesbury@gmail.com>

Mon, 8 Jul 2024 18:52:07 +0000 (14:52 -0400)

committer GitHub <noreply@github.com>

Mon, 8 Jul 2024 18:52:07 +0000 (14:52 -0400)
author Sam Gross <colesbury@gmail.com>
Mon, 8 Jul 2024 18:52:07 +0000 (14:52 -0400)
committer GitHub <noreply@github.com>
Mon, 8 Jul 2024 18:52:07 +0000 (14:52 -0400)
diff --git a/Include/cpython/pyatomic.h b/Include/cpython/pyatomic.h

index 55a139bb9158db993832b6f0e5375c1d7925a4dc..4ecef4f56edf420ebb3c28cc1d597977a0ac727e 100644 (file)
--- a/Include/cpython/pyatomic.h
+++ b/Include/cpython/pyatomic.h
@@ -510,6 +510,9 @@ _Py_atomic_load_ssize_acquire(const Py_ssize_t *obj);
  // See https://en.cppreference.com/w/cpp/atomic/atomic_thread_fence
  static inline void _Py_atomic_fence_seq_cst(void);
  
+// Acquire fence
+static inline void _Py_atomic_fence_acquire(void);
+
  // Release fence
  static inline void _Py_atomic_fence_release(void);
  
diff --git a/Include/cpython/pyatomic_gcc.h b/Include/cpython/pyatomic_gcc.h

index f2ebdeeb5524e47d8444d0777ea2d7fedba9b7a1..ef09954d53ac1d61819856ed5c59a1fca1f1da28 100644 (file)
--- a/Include/cpython/pyatomic_gcc.h
+++ b/Include/cpython/pyatomic_gcc.h
@@ -542,6 +542,10 @@ static inline void
  _Py_atomic_fence_seq_cst(void)
  { __atomic_thread_fence(__ATOMIC_SEQ_CST); }
  
+ static inline void
+_Py_atomic_fence_acquire(void)
+{ __atomic_thread_fence(__ATOMIC_ACQUIRE); }
+
   static inline void
  _Py_atomic_fence_release(void)
  { __atomic_thread_fence(__ATOMIC_RELEASE); }
diff --git a/Include/cpython/pyatomic_msc.h b/Include/cpython/pyatomic_msc.h

index f32995c1f578ac19414c0d1dfd9e22c1ae2183c9..84da21bdcbff4f1ed3bb549db4dfd7350b5dfffe 100644 (file)
--- a/Include/cpython/pyatomic_msc.h
+++ b/Include/cpython/pyatomic_msc.h
@@ -1066,6 +1066,18 @@ _Py_atomic_fence_seq_cst(void)
  #else
  #  error "no implementation of _Py_atomic_fence_seq_cst"
  #endif
+}
+
+ static inline void
+_Py_atomic_fence_acquire(void)
+{
+#if defined(_M_ARM64)
+    __dmb(_ARM64_BARRIER_ISHLD);
+#elif defined(_M_X64) || defined(_M_IX86)
+    _ReadBarrier();
+#else
+#  error "no implementation of _Py_atomic_fence_acquire"
+#endif
  }
  
   static inline void
diff --git a/Include/cpython/pyatomic_std.h b/Include/cpython/pyatomic_std.h

index 0cdce4e6dd39f0f6a472e847ac6e1184d32b8165..7c71e94c68f8e6d16664201bd71f8538c10d361d 100644 (file)
--- a/Include/cpython/pyatomic_std.h
+++ b/Include/cpython/pyatomic_std.h
@@ -961,6 +961,13 @@ _Py_atomic_fence_seq_cst(void)
      atomic_thread_fence(memory_order_seq_cst);
  }
  
+ static inline void
+_Py_atomic_fence_acquire(void)
+{
+    _Py_USING_STD;
+    atomic_thread_fence(memory_order_acquire);
+}
+
   static inline void
  _Py_atomic_fence_release(void)
  {
diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h

index 3824434f3f375d300761ba9d60a7ef404c7b4735..e6da083b807ce5b271fc578e092c3ebf0fa46912 100644 (file)
--- a/Include/internal/pycore_lock.h
+++ b/Include/internal/pycore_lock.h
@@ -228,12 +228,12 @@ PyAPI_FUNC(void) _PySeqLock_AbandonWrite(_PySeqLock *seqlock);
  PyAPI_FUNC(uint32_t) _PySeqLock_BeginRead(_PySeqLock *seqlock);
  
  // End the read operation and confirm that the sequence number has not changed.
-// Returns 1 if the read was successful or 0 if the read should be re-tried.
-PyAPI_FUNC(uint32_t) _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous);
+// Returns 1 if the read was successful or 0 if the read should be retried.
+PyAPI_FUNC(int) _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous);
  
  // Check if the lock was held during a fork and clear the lock.  Returns 1
-// if the lock was held and any associated datat should be cleared.
-PyAPI_FUNC(uint32_t) _PySeqLock_AfterFork(_PySeqLock *seqlock);
+// if the lock was held and any associated data should be cleared.
+PyAPI_FUNC(int) _PySeqLock_AfterFork(_PySeqLock *seqlock);
  
  #ifdef __cplusplus
  }
diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-07-04-23-38-30.gh-issue-121368.m3EF9E.rst b/Misc/NEWS.d/next/Core and Builtins/2024-07-04-23-38-30.gh-issue-121368.m3EF9E.rst

new file mode 100644 (file)

index 0000000..3df5b21
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2024-07-04-23-38-30.gh-issue-121368.m3EF9E.rst
@@ -0,0 +1,3 @@
+Fix race condition in ``_PyType_Lookup`` in the free-threaded build due to
+a missing memory fence.  This could lead to ``_PyType_Lookup`` returning
+incorrect results on arm64.
diff --git a/Modules/_testcapi/pyatomic.c b/Modules/_testcapi/pyatomic.c

index 4f72844535ebd645d8a7692f9c6fdcf67637c472..850de6f9c3366b134df5bf15e632137cd5cd9dd5 100644 (file)
--- a/Modules/_testcapi/pyatomic.c
+++ b/Modules/_testcapi/pyatomic.c
@@ -125,6 +125,7 @@ test_atomic_fences(PyObject *self, PyObject *obj) {
      // Just make sure that the fences compile. We are not
      // testing any synchronizing ordering.
      _Py_atomic_fence_seq_cst();
+    _Py_atomic_fence_acquire();
      _Py_atomic_fence_release();
      Py_RETURN_NONE;
  }
diff --git a/Objects/typeobject.c b/Objects/typeobject.c

index 447e561c0d4440f71957b2c2ae890b0b8a976da8..df895bc65983c0c41ad4300ebeda2ced1509ad04 100644 (file)
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -5387,7 +5387,7 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name)
  #ifdef Py_GIL_DISABLED
      // synchronize-with other writing threads by doing an acquire load on the sequence
      while (1) {
-        int sequence = _PySeqLock_BeginRead(&entry->sequence);
+        uint32_t sequence = _PySeqLock_BeginRead(&entry->sequence);
          uint32_t entry_version = _Py_atomic_load_uint32_relaxed(&entry->version);
          uint32_t type_version = _Py_atomic_load_uint32_acquire(&type->tp_version_tag);
          if (entry_version == type_version &&
diff --git a/Python/lock.c b/Python/lock.c

index 7c6a5175e88ff1108b6377b3d5a42727e390dce3..57675fe1873fa284dd3a051bc6be424d45a269b5 100644 (file)
--- a/Python/lock.c
+++ b/Python/lock.c
@@ -514,6 +514,7 @@ void _PySeqLock_LockWrite(_PySeqLock *seqlock)
          }
          else if (_Py_atomic_compare_exchange_uint32(&seqlock->sequence, &prev, prev + 1)) {
              // We've locked the cache
+            _Py_atomic_fence_release();
              break;
          }
          else {
@@ -547,28 +548,31 @@ uint32_t _PySeqLock_BeginRead(_PySeqLock *seqlock)
      return sequence;
  }
  
-uint32_t _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous)
+int _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous)
  {
-    // Synchronize again and validate that the entry hasn't been updated
-    // while we were readying the values.
-     if (_Py_atomic_load_uint32_acquire(&seqlock->sequence) == previous) {
+    // gh-121368: We need an explicit acquire fence here to ensure that
+    // this load of the sequence number is not reordered before any loads
+    // within the read lock.
+    _Py_atomic_fence_acquire();
+
+    if (_Py_atomic_load_uint32_relaxed(&seqlock->sequence) == previous) {
          return 1;
-     }
+    }
  
-     _Py_yield();
-     return 0;
+    _Py_yield();
+    return 0;
  }
  
-uint32_t _PySeqLock_AfterFork(_PySeqLock *seqlock)
+int _PySeqLock_AfterFork(_PySeqLock *seqlock)
  {
      // Synchronize again and validate that the entry hasn't been updated
      // while we were readying the values.
-     if (SEQLOCK_IS_UPDATING(seqlock->sequence)) {
+    if (SEQLOCK_IS_UPDATING(seqlock->sequence)) {
          seqlock->sequence = 0;
          return 1;
-     }
+    }
  
-     return 0;
+    return 0;
  }
  
  #undef PyMutex_Lock
author	Sam Gross <colesbury@gmail.com>
	Mon, 8 Jul 2024 18:52:07 +0000 (14:52 -0400)
committer	GitHub <noreply@github.com>
	Mon, 8 Jul 2024 18:52:07 +0000 (14:52 -0400)
Include/cpython/pyatomic.h		patch \| blob \| blame \| history
Include/cpython/pyatomic_gcc.h		patch \| blob \| blame \| history
Include/cpython/pyatomic_msc.h		patch \| blob \| blame \| history
Include/cpython/pyatomic_std.h		patch \| blob \| blame \| history
Include/internal/pycore_lock.h		patch \| blob \| blame \| history
Misc/NEWS.d/next/Core and Builtins/2024-07-04-23-38-30.gh-issue-121368.m3EF9E.rst	[new file with mode: 0644]	patch \| blob
Modules/_testcapi/pyatomic.c		patch \| blob \| blame \| history
Objects/typeobject.c		patch \| blob \| blame \| history
Python/lock.c		patch \| blob \| blame \| history