]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-132657: Add lock-free set contains implementation (#132290)
authorNeil Schemenauer <nas-github@arctrix.com>
Sat, 13 Dec 2025 09:50:23 +0000 (01:50 -0800)
committerGitHub <noreply@github.com>
Sat, 13 Dec 2025 09:50:23 +0000 (09:50 +0000)
This roughly follows what was done for dictobject to make a lock-free
lookup operation. With this change, the set contains operation scales much
better when used from multiple-threads. The frozenset contains performance
seems unchanged (as already lock-free).

Summary of changes:

* refactor set_lookkey() into set_do_lookup() which now takes a function
  pointer that does the entry comparison. This is similar to dictobject and
  do_lookup(). In an optimized build, the comparison function is inlined and
  there should be no performance cost to this.

* change set_do_lookup() to return a status separately from the entry value

* add set_compare_frozenset() and use if the object is a frozenset. For the
  free-threaded build, this avoids some overhead (locking, atomic operations,
  incref/decref on key)

* use FT_ATOMIC_* macros as needed for atomic loads and stores

* use a deferred free on the set table array, if shared (only on free-threaded
  build, normal build always does an immediate free)

* for free-threaded build, use explicit for loop to zero the table, rather than memcpy()

* when mutating the set, assign so->table to NULL while the change is a
  happening. Assign the real table array after the change is done.

Include/internal/pycore_pyatomic_ft_wrappers.h
Lib/test/test_free_threading/test_set.py
Misc/NEWS.d/next/Core_and_Builtins/2025-07-11-19-57-27.gh-issue-132657.vwDuO2.rst [new file with mode: 0644]
Objects/clinic/setobject.c.h
Objects/setobject.c

index 817c0763bf899bcf6a7c4fb8f5ede6ac1ddfbe8c..1a6d5075361f3c173a83828ae1522549c08e9e07 100644 (file)
@@ -57,6 +57,8 @@ extern "C" {
     _Py_atomic_store_uintptr_release(&value, new_value)
 #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) \
     _Py_atomic_store_ssize_relaxed(&value, new_value)
+#define FT_ATOMIC_STORE_SSIZE_RELEASE(value, new_value) \
+    _Py_atomic_store_ssize_release(&value, new_value)
 #define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) \
     _Py_atomic_store_uint8_relaxed(&value, new_value)
 #define FT_ATOMIC_STORE_UINT16_RELAXED(value, new_value) \
@@ -140,6 +142,7 @@ extern "C" {
 #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) value = new_value
 #define FT_ATOMIC_STORE_UINTPTR_RELEASE(value, new_value) value = new_value
 #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value
+#define FT_ATOMIC_STORE_SSIZE_RELEASE(value, new_value) value = new_value
 #define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) value = new_value
 #define FT_ATOMIC_STORE_UINT16_RELAXED(value, new_value) value = new_value
 #define FT_ATOMIC_STORE_UINT32_RELAXED(value, new_value) value = new_value
index a66e03bcc4c9d12b0711e51993b1da85ac7c7be8..251020319b20f3a3edc3c723864ee3e462fc9b4a 100644 (file)
@@ -1,18 +1,14 @@
 import unittest
-
 from threading import Thread, Barrier
-from unittest import TestCase
-
 from test.support import threading_helper
 
 
-@threading_helper.requires_working_threading()
-class TestSet(TestCase):
+class TestSetRepr(unittest.TestCase):
     def test_repr_clear(self):
         """Test repr() of a set while another thread is calling clear()"""
         NUM_ITERS = 10
         NUM_REPR_THREADS = 10
-        barrier = Barrier(NUM_REPR_THREADS + 1)
+        barrier = Barrier(NUM_REPR_THREADS + 1, timeout=2)
         s = {1, 2, 3, 4, 5, 6, 7, 8}
 
         def clear_set():
@@ -37,5 +33,133 @@ class TestSet(TestCase):
                 self.assertIn(set_repr, ("set()", "{1, 2, 3, 4, 5, 6, 7, 8}"))
 
 
+class RaceTestBase:
+    def test_contains_mutate(self):
+        """Test set contains operation combined with mutation."""
+        barrier = Barrier(2, timeout=2)
+        s = set()
+        done = False
+
+        NUM_LOOPS = 1000
+
+        def read_set():
+            barrier.wait()
+            while not done:
+                for i in range(self.SET_SIZE):
+                    item = i >> 1
+                    result = item in s
+
+        def mutate_set():
+            nonlocal done
+            barrier.wait()
+            for i in range(NUM_LOOPS):
+                s.clear()
+                for j in range(self.SET_SIZE):
+                    s.add(j)
+                for j in range(self.SET_SIZE):
+                    s.discard(j)
+                # executes the set_swap_bodies() function
+                s.__iand__(set(k for k in range(10, 20)))
+            done = True
+
+        threads = [Thread(target=read_set), Thread(target=mutate_set)]
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join()
+
+    def test_contains_frozenset(self):
+        barrier = Barrier(3, timeout=2)
+        done = False
+
+        NUM_LOOPS = 1_000
+
+        # This mutates the key used for contains test, not the container
+        # itself.  This works because frozenset allows the key to be a set().
+        s = set()
+
+        def mutate_set():
+            barrier.wait()
+            while not done:
+                s.add(0)
+                s.add(1)
+                s.clear()
+
+        def read_set():
+            nonlocal done
+            barrier.wait()
+            container = frozenset([frozenset([0])])
+            self.assertTrue(set([0]) in container)
+            for _ in range(NUM_LOOPS):
+                # Will return True when {0} is the key and False otherwise
+                result = s in container
+            done = True
+
+        threads = [
+            Thread(target=read_set),
+            Thread(target=read_set),
+            Thread(target=mutate_set),
+        ]
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join()
+
+    def test_contains_hash_mutate(self):
+        """Test set contains operation with mutating hash method."""
+        barrier = Barrier(2, timeout=2)
+
+        NUM_LOOPS = 1_000
+        SET_SIZE = self.SET_SIZE
+
+        s = set(range(SET_SIZE))
+
+        class Key:
+            def __init__(self):
+                self.count = 0
+                self.value = 0
+
+            def __hash__(self):
+                self.count += 1
+                # This intends to trigger the SET_LOOKKEY_CHANGED case
+                # of set_lookkey_threadsafe() since calling clear()
+                # will cause the 'table' pointer to change.
+                if self.count % 2 == 0:
+                    s.clear()
+                else:
+                    s.update(range(SET_SIZE))
+                return hash(self.value)
+
+            def __eq__(self, other):
+                return self.value == other
+
+        key = Key()
+        self.assertTrue(key in s)
+        self.assertFalse(key in s)
+        self.assertTrue(key in s)
+        self.assertFalse(key in s)
+
+        def read_set():
+            barrier.wait()
+            for i in range(NUM_LOOPS):
+                result = key in s
+
+        threads = [Thread(target=read_set), Thread(target=read_set)]
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join()
+
+
+@threading_helper.requires_working_threading()
+class SmallSetTest(RaceTestBase, unittest.TestCase):
+    SET_SIZE = 6  # smaller than PySet_MINSIZE
+
+
+@threading_helper.requires_working_threading()
+class LargeSetTest(RaceTestBase, unittest.TestCase):
+    SET_SIZE = 20  # larger than PySet_MINSIZE
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-07-11-19-57-27.gh-issue-132657.vwDuO2.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-11-19-57-27.gh-issue-132657.vwDuO2.rst
new file mode 100644 (file)
index 0000000..2709932
--- /dev/null
@@ -0,0 +1,2 @@
+For the free-threaded build, avoid locking the :class:`set` object for the
+``__contains__`` method.
index 488be4435f81d74497e02d617b37b226372ac3bd..098c4bcb53d3fb1508995a97879568c70107ae8f 100644 (file)
@@ -425,9 +425,7 @@ set___contains__(PyObject *so, PyObject *key)
 {
     PyObject *return_value = NULL;
 
-    Py_BEGIN_CRITICAL_SECTION(so);
     return_value = set___contains___impl((PySetObject *)so, key);
-    Py_END_CRITICAL_SECTION();
 
     return return_value;
 }
@@ -554,4 +552,4 @@ set___sizeof__(PyObject *so, PyObject *Py_UNUSED(ignored))
 
     return return_value;
 }
-/*[clinic end generated code: output=7f7fe845ca165078 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=5800c0bf136a5a0a input=a9049054013a1b77]*/
index 85f4d7d403178adbeb53bace11b41861eeb9f7f3..2c585da1c43a78195232c4ef33d387ad034c9adc 100644 (file)
@@ -63,6 +63,148 @@ static PyObject _dummy_struct;
 
 #define dummy (&_dummy_struct)
 
+#define SET_LOOKKEY_FOUND 1
+#define SET_LOOKKEY_NO_MATCH 0
+#define SET_LOOKKEY_ERROR (-1)
+#define SET_LOOKKEY_CHANGED (-2)
+#define SET_LOOKKEY_EMPTY (-3)
+
+typedef int (*compare_func)(PySetObject *so, setentry *table, setentry *ep,
+                            PyObject *key, Py_hash_t hash);
+
+#ifdef Py_GIL_DISABLED
+
+#define SET_IS_SHARED(so) _PyObject_GC_IS_SHARED(so)
+#define SET_MARK_SHARED(so) _PyObject_GC_SET_SHARED(so)
+
+static void
+ensure_shared_on_read(PySetObject *so)
+{
+    if (!_Py_IsOwnedByCurrentThread((PyObject *)so) && !SET_IS_SHARED(so)) {
+        // The first time we access a set from a non-owning thread we mark it
+        // as shared. This ensures that a concurrent resize operation will
+        // delay freeing the old entries using QSBR, which is necessary
+        // to safely allow concurrent reads without locking...
+        Py_BEGIN_CRITICAL_SECTION(so);
+        if (!SET_IS_SHARED(so)) {
+            SET_MARK_SHARED(so);
+        }
+        Py_END_CRITICAL_SECTION();
+    }
+}
+
+static inline Py_ALWAYS_INLINE int
+set_compare_threadsafe(PySetObject *so, setentry *table, setentry *ep,
+                       PyObject *key, Py_hash_t hash)
+{
+    PyObject *startkey = FT_ATOMIC_LOAD_PTR_ACQUIRE(ep->key);
+    if (startkey == NULL) {
+        return SET_LOOKKEY_EMPTY;
+    }
+    if (startkey == key) {
+        return SET_LOOKKEY_FOUND;
+    }
+    Py_ssize_t ep_hash = FT_ATOMIC_LOAD_SSIZE_ACQUIRE(ep->hash);
+    if (ep_hash == hash) {
+        if (!_Py_TryIncrefCompare(&ep->key, startkey)) {
+            return SET_LOOKKEY_CHANGED;
+        }
+        int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
+        Py_DECREF(startkey);
+        if (cmp < 0) {
+            return SET_LOOKKEY_ERROR;
+        }
+        if (table == FT_ATOMIC_LOAD_PTR_ACQUIRE(so->table) &&
+            startkey == FT_ATOMIC_LOAD_PTR_ACQUIRE(ep->key)) {
+            assert(cmp == SET_LOOKKEY_FOUND || cmp == SET_LOOKKEY_NO_MATCH);
+            return cmp;
+        }
+        else {
+            /* The set was mutated, restart */
+            return SET_LOOKKEY_CHANGED;
+        }
+    }
+    return SET_LOOKKEY_NO_MATCH;
+}
+
+#else
+
+#define SET_IS_SHARED(so) 0
+#define SET_MARK_SHARED(so)
+
+#endif
+
+static inline Py_ALWAYS_INLINE int
+set_compare_entry_lock_held(PySetObject *so, setentry *table, setentry *entry,
+                            PyObject *key, Py_hash_t hash)
+{
+    _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(so);
+    if (entry->hash == 0 && entry->key == NULL)
+        return SET_LOOKKEY_EMPTY;
+    if (entry->hash == hash) {
+        PyObject *startkey = entry->key;
+        assert(startkey != dummy);
+        if (startkey == key)
+            return SET_LOOKKEY_FOUND;
+        if (PyUnicode_CheckExact(startkey)
+            && PyUnicode_CheckExact(key)
+            && unicode_eq(startkey, key))
+            return SET_LOOKKEY_FOUND;
+        table = so->table;
+        Py_INCREF(startkey);
+        int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
+        Py_DECREF(startkey);
+        if (cmp < 0)
+            return SET_LOOKKEY_ERROR;
+        if (table != so->table || entry->key != startkey)
+            return SET_LOOKKEY_CHANGED;
+        if (cmp > 0)
+            return SET_LOOKKEY_FOUND;
+    }
+    return SET_LOOKKEY_NO_MATCH;
+}
+
+// This is similar to set_compare_entry_lock_held() but we don't need to
+// incref startkey before comparing and we don't need to check if the set has
+// changed.  This also omits the PyUnicode_CheckExact() special case since it
+// doesn't help much for frozensets.
+static inline Py_ALWAYS_INLINE int
+set_compare_frozenset(PySetObject *so, setentry *table, setentry *ep,
+                                 PyObject *key, Py_hash_t hash)
+{
+    assert(PyFrozenSet_Check(so));
+    PyObject *startkey = ep->key;
+    if (startkey == NULL) {
+        return SET_LOOKKEY_EMPTY;
+    }
+    if (startkey == key) {
+        return SET_LOOKKEY_FOUND;
+    }
+    Py_ssize_t ep_hash = ep->hash;
+    if (ep_hash == hash) {
+        int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
+        if (cmp < 0) {
+            return SET_LOOKKEY_ERROR;
+        }
+        assert(cmp == SET_LOOKKEY_FOUND || cmp == SET_LOOKKEY_NO_MATCH);
+        return cmp;
+    }
+    return SET_LOOKKEY_NO_MATCH;
+}
+
+static void
+set_zero_table(setentry *table, size_t size)
+{
+#ifdef Py_GIL_DISABLED
+    for (size_t i = 0; i < size; i++) {
+        setentry *entry = &table[i];
+        FT_ATOMIC_STORE_SSIZE_RELAXED(entry->hash, 0);
+        FT_ATOMIC_STORE_PTR_RELEASE(entry->key, NULL);
+    }
+#else
+    memset(table, 0, sizeof(setentry)*size);
+#endif
+}
 
 /* ======================================================================== */
 /* ======= Begin logic for probing the hash table ========================= */
@@ -75,58 +217,34 @@ static PyObject _dummy_struct;
 /* This must be >= 1 */
 #define PERTURB_SHIFT 5
 
-static setentry *
-set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
+static int
+set_do_lookup(PySetObject *so, setentry *table, size_t mask, PyObject *key,
+              Py_hash_t hash, setentry **epp, compare_func compare_entry)
 {
-    setentry *table;
     setentry *entry;
     size_t perturb = hash;
-    size_t mask = so->mask;
     size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */
     int probes;
-    int cmp;
-
-    int frozenset = PyFrozenSet_CheckExact(so);
+    int status;
 
     while (1) {
-        entry = &so->table[i];
+        entry = &table[i];
         probes = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0;
         do {
-            if (entry->hash == 0 && entry->key == NULL)
-                return entry;
-            if (entry->hash == hash) {
-                PyObject *startkey = entry->key;
-                assert(startkey != dummy);
-                if (startkey == key)
-                    return entry;
-                if (PyUnicode_CheckExact(startkey)
-                    && PyUnicode_CheckExact(key)
-                    && unicode_eq(startkey, key))
-                    return entry;
-                table = so->table;
-                if (frozenset) {
-                    cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
-                    if (cmp < 0)
-                        return NULL;
-                } else {
-                    // incref startkey because it can be removed from the set by the compare
-                    Py_INCREF(startkey);
-                    cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
-                    Py_DECREF(startkey);
-                    if (cmp < 0)
-                        return NULL;
-                    if (table != so->table || entry->key != startkey)
-                        return set_lookkey(so, key, hash);
+            status = compare_entry(so, table, entry, key, hash);
+            if (status != SET_LOOKKEY_NO_MATCH) {
+                if (status == SET_LOOKKEY_EMPTY) {
+                    return SET_LOOKKEY_NO_MATCH;
                 }
-                if (cmp > 0)
-                    return entry;
-                mask = so->mask;
+                *epp = entry;
+                return status;
             }
             entry++;
         } while (probes--);
         perturb >>= PERTURB_SHIFT;
         i = (i * 5 + 1 + perturb) & mask;
     }
+    Py_UNREACHABLE();
 }
 
 static int set_table_resize(PySetObject *, Py_ssize_t);
@@ -191,15 +309,15 @@ set_add_entry_takeref(PySetObject *so, PyObject *key, Py_hash_t hash)
     if (freeslot == NULL)
         goto found_unused;
     FT_ATOMIC_STORE_SSIZE_RELAXED(so->used, so->used + 1);
-    freeslot->key = key;
-    freeslot->hash = hash;
+    FT_ATOMIC_STORE_SSIZE_RELAXED(freeslot->hash, hash);
+    FT_ATOMIC_STORE_PTR_RELEASE(freeslot->key, key);
     return 0;
 
   found_unused:
     so->fill++;
     FT_ATOMIC_STORE_SSIZE_RELAXED(so->used, so->used + 1);
-    entry->key = key;
-    entry->hash = hash;
+    FT_ATOMIC_STORE_SSIZE_RELAXED(entry->hash, hash);
+    FT_ATOMIC_STORE_PTR_RELEASE(entry->key, key);
     if ((size_t)so->fill*5 < mask*3)
         return 0;
     return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
@@ -282,13 +400,78 @@ set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash)
         i = (i * 5 + 1 + perturb) & mask;
     }
   found_null:
-    entry->key = key;
-    entry->hash = hash;
+    FT_ATOMIC_STORE_SSIZE_RELAXED(entry->hash, hash);
+    FT_ATOMIC_STORE_PTR_RELEASE(entry->key, key);
 }
 
 /* ======== End logic for probing the hash table ========================== */
 /* ======================================================================== */
 
+static int
+set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash, setentry **epp)
+{
+    int status;
+    if (PyFrozenSet_CheckExact(so)) {
+        status = set_do_lookup(so, so->table, so->mask, key, hash, epp,
+                               set_compare_frozenset);
+    }
+    else {
+        Py_BEGIN_CRITICAL_SECTION(so);
+        do {
+            status = set_do_lookup(so, so->table, so->mask, key, hash, epp,
+                                   set_compare_entry_lock_held);
+        } while (status == SET_LOOKKEY_CHANGED);
+        Py_END_CRITICAL_SECTION();
+    }
+    assert(status == SET_LOOKKEY_FOUND ||
+           status == SET_LOOKKEY_NO_MATCH ||
+           status == SET_LOOKKEY_ERROR);
+    return status;
+}
+
+#ifdef Py_GIL_DISABLED
+static int
+set_lookkey_threadsafe(PySetObject *so, PyObject *key, Py_hash_t hash)
+{
+    int status;
+    setentry *entry;
+    if (PyFrozenSet_CheckExact(so)) {
+        status = set_do_lookup(so, so->table, so->mask, key, hash, &entry,
+                               set_compare_frozenset);
+        assert(status == SET_LOOKKEY_FOUND ||
+               status == SET_LOOKKEY_NO_MATCH ||
+               status == SET_LOOKKEY_ERROR);
+        return status;
+    }
+    ensure_shared_on_read(so);
+    setentry *table = FT_ATOMIC_LOAD_PTR_ACQUIRE(so->table);
+    size_t mask = FT_ATOMIC_LOAD_SSIZE_RELAXED(so->mask);
+    if (table == NULL || table != FT_ATOMIC_LOAD_PTR_ACQUIRE(so->table)) {
+        return set_lookkey(so, key, hash, &entry);
+    }
+    status = set_do_lookup(so, table, mask, key, hash, &entry,
+                           set_compare_threadsafe);
+    if (status == SET_LOOKKEY_CHANGED) {
+        return set_lookkey(so, key, hash, &entry);
+    }
+    assert(status == SET_LOOKKEY_FOUND ||
+           status == SET_LOOKKEY_NO_MATCH ||
+           status == SET_LOOKKEY_ERROR);
+    return status;
+}
+#endif
+
+static void free_entries(setentry *entries, size_t size, bool use_qsbr)
+{
+#ifdef Py_GIL_DISABLED
+    if (use_qsbr) {
+        _PyMem_FreeDelayed(entries, size * sizeof(setentry));
+        return;
+    }
+#endif
+    PyMem_Free(entries);
+}
+
 /*
 Restructure the table by allocating a new table and reinserting all
 keys again.  When entries have been deleted, the new table may
@@ -299,6 +482,7 @@ set_table_resize(PySetObject *so, Py_ssize_t minused)
 {
     setentry *oldtable, *newtable, *entry;
     Py_ssize_t oldmask = so->mask;
+    Py_ssize_t oldsize = (size_t)oldmask + 1;
     size_t newmask;
     int is_oldtable_malloced;
     setentry small_copy[PySet_MINSIZE];
@@ -346,9 +530,9 @@ set_table_resize(PySetObject *so, Py_ssize_t minused)
 
     /* Make the set empty, using the new table. */
     assert(newtable != oldtable);
-    memset(newtable, 0, sizeof(setentry) * newsize);
-    so->mask = newsize - 1;
-    so->table = newtable;
+    set_zero_table(newtable, newsize);
+    FT_ATOMIC_STORE_PTR_RELEASE(so->table, NULL);
+    FT_ATOMIC_STORE_SSIZE_RELAXED(so->mask, newsize - 1);
 
     /* Copy the data over; this is refcount-neutral for active entries;
        dummy entries aren't copied over, of course */
@@ -368,20 +552,22 @@ set_table_resize(PySetObject *so, Py_ssize_t minused)
         }
     }
 
+    FT_ATOMIC_STORE_PTR_RELEASE(so->table, newtable);
+
     if (is_oldtable_malloced)
-        PyMem_Free(oldtable);
+        free_entries(oldtable, oldsize, SET_IS_SHARED(so));
     return 0;
 }
 
 static int
 set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
 {
-    setentry *entry;
-
-    entry = set_lookkey(so, key, hash);
-    if (entry != NULL)
-        return entry->key != NULL;
-    return -1;
+#ifdef Py_GIL_DISABLED
+    return set_lookkey_threadsafe(so, key, hash);
+#else
+    setentry *entry; // unused
+    return set_lookkey(so, key, hash, &entry);
+#endif
 }
 
 #define DISCARD_NOTFOUND 0
@@ -392,16 +578,18 @@ set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
 {
     setentry *entry;
     PyObject *old_key;
-
-    entry = set_lookkey(so, key, hash);
-    if (entry == NULL)
+    int status = set_lookkey(so, key, hash, &entry);
+    if (status < 0) {
         return -1;
-    if (entry->key == NULL)
+    }
+    if (status == SET_LOOKKEY_NO_MATCH) {
         return DISCARD_NOTFOUND;
+    }
+    assert(status == SET_LOOKKEY_FOUND);
     old_key = entry->key;
-    entry->key = dummy;
-    entry->hash = -1;
+    FT_ATOMIC_STORE_SSIZE_RELAXED(entry->hash, -1);
     FT_ATOMIC_STORE_SSIZE_RELAXED(so->used, so->used - 1);
+    FT_ATOMIC_STORE_PTR_RELEASE(entry->key, dummy);
     Py_DECREF(old_key);
     return DISCARD_FOUND;
 }
@@ -442,12 +630,13 @@ set_discard_key(PySetObject *so, PyObject *key)
 static void
 set_empty_to_minsize(PySetObject *so)
 {
-    memset(so->smalltable, 0, sizeof(so->smalltable));
+    FT_ATOMIC_STORE_PTR_RELEASE(so->table, NULL);
+    set_zero_table(so->smalltable, PySet_MINSIZE);
     so->fill = 0;
     FT_ATOMIC_STORE_SSIZE_RELAXED(so->used, 0);
-    so->mask = PySet_MINSIZE - 1;
-    so->table = so->smalltable;
+    FT_ATOMIC_STORE_SSIZE_RELAXED(so->mask, PySet_MINSIZE - 1);
     FT_ATOMIC_STORE_SSIZE_RELAXED(so->hash, -1);
+    FT_ATOMIC_STORE_PTR_RELEASE(so->table, so->smalltable);
 }
 
 static int
@@ -458,6 +647,7 @@ set_clear_internal(PyObject *self)
     setentry *table = so->table;
     Py_ssize_t fill = so->fill;
     Py_ssize_t used = so->used;
+    Py_ssize_t oldsize = (size_t)so->mask + 1;
     int table_is_malloced = table != so->smalltable;
     setentry small_copy[PySet_MINSIZE];
 
@@ -496,7 +686,7 @@ set_clear_internal(PyObject *self)
     }
 
     if (table_is_malloced)
-        PyMem_Free(table);
+        free_entries(table, oldsize, SET_IS_SHARED(so));
     return 0;
 }
 
@@ -543,6 +733,7 @@ set_dealloc(PyObject *self)
     PySetObject *so = _PySet_CAST(self);
     setentry *entry;
     Py_ssize_t used = so->used;
+    Py_ssize_t oldsize = (size_t)so->mask + 1;
 
     /* bpo-31095: UnTrack is needed before calling any callbacks */
     PyObject_GC_UnTrack(so);
@@ -555,7 +746,7 @@ set_dealloc(PyObject *self)
         }
     }
     if (so->table != so->smalltable)
-        PyMem_Free(so->table);
+        free_entries(so->table, oldsize, SET_IS_SHARED(so));
     Py_TYPE(so)->tp_free(so);
 }
 
@@ -666,8 +857,8 @@ set_merge_lock_held(PySetObject *so, PyObject *otherset)
             key = other_entry->key;
             if (key != NULL) {
                 assert(so_entry->key == NULL);
-                so_entry->key = Py_NewRef(key);
-                so_entry->hash = other_entry->hash;
+                FT_ATOMIC_STORE_SSIZE_RELAXED(so_entry->hash, other_entry->hash);
+                FT_ATOMIC_STORE_PTR_RELEASE(so_entry->key, Py_NewRef(key));
             }
         }
         so->fill = other->fill;
@@ -731,10 +922,10 @@ set_pop_impl(PySetObject *so)
         if (entry > limit)
             entry = so->table;
     }
-    key = entry->key;
-    entry->key = dummy;
-    entry->hash = -1;
+    FT_ATOMIC_STORE_SSIZE_RELAXED(entry->hash, -1);
     FT_ATOMIC_STORE_SSIZE_RELAXED(so->used, so->used - 1);
+    key = entry->key;
+    FT_ATOMIC_STORE_PTR_RELEASE(entry->key, dummy);
     so->finger = entry - so->table + 1;   /* next place to start */
     return key;
 }
@@ -821,11 +1012,11 @@ frozenset_hash(PyObject *self)
     Py_uhash_t hash;
 
     if (FT_ATOMIC_LOAD_SSIZE_RELAXED(so->hash) != -1) {
-        return FT_ATOMIC_LOAD_SSIZE_RELAXED(so->hash);
+        return FT_ATOMIC_LOAD_SSIZE_ACQUIRE(so->hash);
     }
 
     hash = frozenset_hash_impl(self);
-    FT_ATOMIC_STORE_SSIZE_RELAXED(so->hash, hash);
+    FT_ATOMIC_STORE_SSIZE_RELEASE(so->hash, hash);
     return hash;
 }
 
@@ -907,8 +1098,8 @@ static PyObject *setiter_iternext(PyObject *self)
         return NULL;
     assert (PyAnySet_Check(so));
 
-    Py_ssize_t so_used = FT_ATOMIC_LOAD_SSIZE(so->used);
-    Py_ssize_t si_used = FT_ATOMIC_LOAD_SSIZE(si->si_used);
+    Py_ssize_t so_used = FT_ATOMIC_LOAD_SSIZE_RELAXED(so->used);
+    Py_ssize_t si_used = FT_ATOMIC_LOAD_SSIZE_RELAXED(si->si_used);
     if (si_used != so_used) {
         PyErr_SetString(PyExc_RuntimeError,
                         "Set changed size during iteration");
@@ -1248,21 +1439,28 @@ set_swap_bodies(PySetObject *a, PySetObject *b)
     setentry tab[PySet_MINSIZE];
     Py_hash_t h;
 
+    setentry *a_table = a->table;
+    setentry *b_table = b->table;
+    FT_ATOMIC_STORE_PTR_RELEASE(a->table, NULL);
+    FT_ATOMIC_STORE_PTR_RELEASE(b->table, NULL);
+
     t = a->fill;     a->fill   = b->fill;        b->fill  = t;
     t = a->used;
     FT_ATOMIC_STORE_SSIZE_RELAXED(a->used, b->used);
     FT_ATOMIC_STORE_SSIZE_RELAXED(b->used, t);
-    t = a->mask;     a->mask   = b->mask;        b->mask  = t;
+    t = a->mask;
+    FT_ATOMIC_STORE_SSIZE_RELAXED(a->mask, b->mask);
+    FT_ATOMIC_STORE_SSIZE_RELAXED(b->mask, t);
 
-    u = a->table;
-    if (a->table == a->smalltable)
+    u = a_table;
+    if (a_table == a->smalltable)
         u = b->smalltable;
-    a->table  = b->table;
-    if (b->table == b->smalltable)
-        a->table = a->smalltable;
-    b->table = u;
+    a_table  = b_table;
+    if (b_table == b->smalltable)
+        a_table = a->smalltable;
+    b_table = u;
 
-    if (a->table == a->smalltable || b->table == b->smalltable) {
+    if (a_table == a->smalltable || b_table == b->smalltable) {
         memcpy(tab, a->smalltable, sizeof(tab));
         memcpy(a->smalltable, b->smalltable, sizeof(tab));
         memcpy(b->smalltable, tab, sizeof(tab));
@@ -1277,6 +1475,14 @@ set_swap_bodies(PySetObject *a, PySetObject *b)
         FT_ATOMIC_STORE_SSIZE_RELAXED(a->hash, -1);
         FT_ATOMIC_STORE_SSIZE_RELAXED(b->hash, -1);
     }
+    if (!SET_IS_SHARED(b) && SET_IS_SHARED(a)) {
+        SET_MARK_SHARED(b);
+    }
+    if (!SET_IS_SHARED(a) && SET_IS_SHARED(b)) {
+        SET_MARK_SHARED(a);
+    }
+    FT_ATOMIC_STORE_PTR_RELEASE(a->table, a_table);
+    FT_ATOMIC_STORE_PTR_RELEASE(b->table, b_table);
 }
 
 /*[clinic input]
@@ -2222,39 +2428,26 @@ set_add_impl(PySetObject *so, PyObject *key)
     Py_RETURN_NONE;
 }
 
-static int
-set_contains_lock_held(PySetObject *so, PyObject *key)
+int
+_PySet_Contains(PySetObject *so, PyObject *key)
 {
-    int rv;
+    assert(so);
 
-    rv = set_contains_key(so, key);
-    if (rv < 0) {
-        if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
+    Py_hash_t hash = _PyObject_HashFast(key);
+    if (hash == -1) {
+        if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError)) {
+            set_unhashable_type(key);
             return -1;
+        }
         PyErr_Clear();
-        Py_hash_t hash;
+        // Note that 'key' could be a set() or frozenset() object.  Unlike most
+        // container types, set allows membership testing with a set key, even
+        // though it is not hashable.
         Py_BEGIN_CRITICAL_SECTION(key);
         hash = frozenset_hash_impl(key);
         Py_END_CRITICAL_SECTION();
-        rv = set_contains_entry(so, key, hash);
-    }
-    return rv;
-}
-
-int
-_PySet_Contains(PySetObject *so, PyObject *key)
-{
-    assert(so);
-
-    int rv;
-    if (PyFrozenSet_CheckExact(so)) {
-        rv = set_contains_lock_held(so, key);
-    } else {
-        Py_BEGIN_CRITICAL_SECTION(so);
-        rv = set_contains_lock_held(so, key);
-        Py_END_CRITICAL_SECTION();
     }
-    return rv;
+    return set_contains_entry(so, key, hash);
 }
 
 static int
@@ -2265,7 +2458,6 @@ set_contains(PyObject *self, PyObject *key)
 }
 
 /*[clinic input]
-@critical_section
 @coexist
 set.__contains__
     so: setobject
@@ -2277,11 +2469,11 @@ x.__contains__(y) <==> y in x.
 
 static PyObject *
 set___contains___impl(PySetObject *so, PyObject *key)
-/*[clinic end generated code: output=b44863d034b3c70e input=4a7d568459617f24]*/
+/*[clinic end generated code: output=b44863d034b3c70e input=cf4c72db704e4cf0]*/
 {
     long result;
 
-    result = set_contains_lock_held(so, key);
+    result = _PySet_Contains(so, key);
     if (result < 0)
         return NULL;
     return PyBool_FromLong(result);
@@ -2301,12 +2493,23 @@ static PyObject *
 frozenset___contains___impl(PySetObject *so, PyObject *key)
 /*[clinic end generated code: output=2301ed91bc3a6dd5 input=2f04922a98d8bab7]*/
 {
-    long result;
-
-    result = set_contains_lock_held(so, key);
-    if (result < 0)
+    Py_hash_t hash = _PyObject_HashFast(key);
+    if (hash == -1) {
+        if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError)) {
+            set_unhashable_type(key);
+            return NULL;
+        }
+        PyErr_Clear();
+        Py_BEGIN_CRITICAL_SECTION(key);
+        hash = frozenset_hash_impl(key);
+        Py_END_CRITICAL_SECTION();
+    }
+    setentry *entry; // unused
+    int status = set_do_lookup(so, so->table, so->mask, key, hash, &entry,
+                           set_compare_frozenset);
+    if (status < 0)
         return NULL;
-    return PyBool_FromLong(result);
+    return PyBool_FromLong(status);
 }
 
 /*[clinic input]