// See https://en.cppreference.com/w/cpp/atomic/atomic_thread_fence
static inline void _Py_atomic_fence_seq_cst(void);
+// Acquire fence
+static inline void _Py_atomic_fence_acquire(void);
+
// Release fence
static inline void _Py_atomic_fence_release(void);
_Py_atomic_fence_seq_cst(void)
{ __atomic_thread_fence(__ATOMIC_SEQ_CST); }
+ static inline void
+_Py_atomic_fence_acquire(void)
+{ __atomic_thread_fence(__ATOMIC_ACQUIRE); }
+
static inline void
_Py_atomic_fence_release(void)
{ __atomic_thread_fence(__ATOMIC_RELEASE); }
#else
# error "no implementation of _Py_atomic_fence_seq_cst"
#endif
+}
+
+ static inline void
+_Py_atomic_fence_acquire(void)
+{
+#if defined(_M_ARM64)
+ __dmb(_ARM64_BARRIER_ISHLD);
+#elif defined(_M_X64) || defined(_M_IX86)
+ _ReadBarrier();
+#else
+# error "no implementation of _Py_atomic_fence_acquire"
+#endif
}
static inline void
atomic_thread_fence(memory_order_seq_cst);
}
+ static inline void
+_Py_atomic_fence_acquire(void)
+{
+ _Py_USING_STD;
+ atomic_thread_fence(memory_order_acquire);
+}
+
static inline void
_Py_atomic_fence_release(void)
{
PyAPI_FUNC(uint32_t) _PySeqLock_BeginRead(_PySeqLock *seqlock);
// End the read operation and confirm that the sequence number has not changed.
-// Returns 1 if the read was successful or 0 if the read should be re-tried.
-PyAPI_FUNC(uint32_t) _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous);
+// Returns 1 if the read was successful or 0 if the read should be retried.
+PyAPI_FUNC(int) _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous);
// Check if the lock was held during a fork and clear the lock. Returns 1
-// if the lock was held and any associated datat should be cleared.
-PyAPI_FUNC(uint32_t) _PySeqLock_AfterFork(_PySeqLock *seqlock);
+// if the lock was held and any associated data should be cleared.
+PyAPI_FUNC(int) _PySeqLock_AfterFork(_PySeqLock *seqlock);
#ifdef __cplusplus
}
--- /dev/null
+Fix race condition in ``_PyType_Lookup`` in the free-threaded build due to
+a missing memory fence. This could lead to ``_PyType_Lookup`` returning
+incorrect results on arm64.
// Just make sure that the fences compile. We are not
// testing any synchronizing ordering.
_Py_atomic_fence_seq_cst();
+ _Py_atomic_fence_acquire();
_Py_atomic_fence_release();
Py_RETURN_NONE;
}
#ifdef Py_GIL_DISABLED
// synchronize-with other writing threads by doing an acquire load on the sequence
while (1) {
- int sequence = _PySeqLock_BeginRead(&entry->sequence);
+ uint32_t sequence = _PySeqLock_BeginRead(&entry->sequence);
uint32_t entry_version = _Py_atomic_load_uint32_relaxed(&entry->version);
uint32_t type_version = _Py_atomic_load_uint32_acquire(&type->tp_version_tag);
if (entry_version == type_version &&
}
else if (_Py_atomic_compare_exchange_uint32(&seqlock->sequence, &prev, prev + 1)) {
// We've locked the cache
+ _Py_atomic_fence_release();
break;
}
else {
return sequence;
}
-uint32_t _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous)
+int _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous)
{
- // Synchronize again and validate that the entry hasn't been updated
- // while we were readying the values.
- if (_Py_atomic_load_uint32_acquire(&seqlock->sequence) == previous) {
+ // gh-121368: We need an explicit acquire fence here to ensure that
+ // this load of the sequence number is not reordered before any loads
+ // within the read lock.
+ _Py_atomic_fence_acquire();
+
+ if (_Py_atomic_load_uint32_relaxed(&seqlock->sequence) == previous) {
return 1;
- }
+ }
- _Py_yield();
- return 0;
+ _Py_yield();
+ return 0;
}
-uint32_t _PySeqLock_AfterFork(_PySeqLock *seqlock)
+int _PySeqLock_AfterFork(_PySeqLock *seqlock)
{
// Synchronize again and validate that the entry hasn't been updated
// while we were readying the values.
- if (SEQLOCK_IS_UPDATING(seqlock->sequence)) {
+ if (SEQLOCK_IS_UPDATING(seqlock->sequence)) {
seqlock->sequence = 0;
return 1;
- }
+ }
- return 0;
+ return 0;
}
#undef PyMutex_Lock