]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-113956: Make intern_common thread-safe in free-threaded build (gh-148886)
authorSam Gross <colesbury@gmail.com>
Thu, 23 Apr 2026 18:42:57 +0000 (14:42 -0400)
committerGitHub <noreply@github.com>
Thu, 23 Apr 2026 18:42:57 +0000 (14:42 -0400)
Avoid racing with the owning thread's refcount operations when
immortalizing an interned string: if we don't own it and its refcount
isn't merged, intern a copy we own instead. Use atomic stores in
_Py_SetImmortalUntracked so concurrent atomic reads are race-free.

Lib/test/test_free_threading/test_str.py
Misc/NEWS.d/next/Core_and_Builtins/2026-04-22-14-55-18.gh-issue-113956.0VEXd6.rst [new file with mode: 0644]
Objects/object.c
Objects/unicodeobject.c

index 9a1ce3620ac4b2e3096dc012996ba0080cc11902..11e04009956db1d2a4d5bb44df51708f27f46993 100644 (file)
@@ -1,7 +1,9 @@
+import sys
+import threading
 import unittest
 
 from itertools import cycle
-from threading import Event, Thread
+from threading import Barrier, Event, Thread
 from unittest import TestCase
 
 from test.support import threading_helper
@@ -69,6 +71,24 @@ class TestStr(TestCase):
         for reader in readers:
             reader.join()
 
+    def test_intern_unowned_string(self):
+        # Test interning strings owned by various threads.
+        strings = [f"intern_race_owner_{i}" for i in range(50)]
+
+        NUM_THREADS = 5
+        b = Barrier(NUM_THREADS)
+
+        def interner():
+            tid = threading.get_ident()
+            for i in range(20):
+                strings.append(f"intern_{tid}_{i}")
+            b.wait()
+            for s in strings:
+                r = sys.intern(s)
+                self.assertTrue(sys._is_interned(r))
+
+        threading_helper.run_concurrently(interner, nthreads=NUM_THREADS)
+
     def test_maketrans_dict_concurrent_modification(self):
         for _ in range(5):
             d = {2000: 'a'}
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-22-14-55-18.gh-issue-113956.0VEXd6.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-22-14-55-18.gh-issue-113956.0VEXd6.rst
new file mode 100644 (file)
index 0000000..54c04bb
--- /dev/null
@@ -0,0 +1,4 @@
+Fix a data race in :func:`sys.intern` in the free-threaded build when
+interning a string owned by another thread. An interned copy owned by the
+current thread is used instead when it is not safe to immortalize the
+original.
index 3166254f6f640b486ccbd0990b9ae3d929e57353..4fa20470601eb3db1bc44772898bf68c340e5aff 100644 (file)
@@ -2768,9 +2768,9 @@ _Py_SetImmortalUntracked(PyObject *op)
         return;
     }
 #ifdef Py_GIL_DISABLED
-    op->ob_tid = _Py_UNOWNED_TID;
-    op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL;
-    op->ob_ref_shared = 0;
+    _Py_atomic_store_uintptr_relaxed(&op->ob_tid, _Py_UNOWNED_TID);
+    _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, _Py_IMMORTAL_REFCNT_LOCAL);
+    _Py_atomic_store_ssize_relaxed(&op->ob_ref_shared, 0);
     _Py_atomic_or_uint8(&op->ob_gc_bits, _PyGC_BITS_DEFERRED);
 #elif SIZEOF_VOID_P > 4
     op->ob_flags = _Py_IMMORTAL_FLAGS;
index d2569132998accf028312db3ee6ec1188f71e713..9aee7120c811de8076e7856d5c9b251268ab5351 100644 (file)
@@ -589,6 +589,14 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
 {
 #define CHECK(expr) \
     do { if (!(expr)) { _PyObject_ASSERT_FAILED_MSG(op, Py_STRINGIFY(expr)); } } while (0)
+#ifdef Py_GIL_DISABLED
+# define CHECK_IF_GIL(expr) (void)(expr)
+# define CHECK_IF_FT(expr) CHECK(expr)
+#else
+# define CHECK_IF_GIL(expr) CHECK(expr)
+# define CHECK_IF_FT(expr) (void)(expr)
+#endif
+
 
     assert(op != NULL);
     CHECK(PyUnicode_Check(op));
@@ -669,11 +677,9 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
 
     /* Check interning state */
 #ifdef Py_DEBUG
-    // Note that we do not check `_Py_IsImmortal(op)`, since stable ABI
-    // extensions can make immortal strings mortal (but with a high enough
-    // refcount).
-    // The other way is extremely unlikely (worth a potential failed assertion
-    // in a debug build), so we do check `!_Py_IsImmortal(op)`.
+    // Note that we do not check `_Py_IsImmortal(op)` in the GIL-enabled build
+    // since stable ABI extensions can make immortal strings mortal (but with a
+    // high enough refcount).
     switch (PyUnicode_CHECK_INTERNED(op)) {
         case SSTATE_NOT_INTERNED:
             if (ascii->state.statically_allocated) {
@@ -683,18 +689,20 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
                 //   are static but use SSTATE_NOT_INTERNED
             }
             else {
-                CHECK(!_Py_IsImmortal(op));
+                CHECK_IF_GIL(!_Py_IsImmortal(op));
             }
             break;
         case SSTATE_INTERNED_MORTAL:
             CHECK(!ascii->state.statically_allocated);
-            CHECK(!_Py_IsImmortal(op));
+            CHECK_IF_GIL(!_Py_IsImmortal(op));
             break;
         case SSTATE_INTERNED_IMMORTAL:
             CHECK(!ascii->state.statically_allocated);
+            CHECK_IF_FT(_Py_IsImmortal(op));
             break;
         case SSTATE_INTERNED_IMMORTAL_STATIC:
             CHECK(ascii->state.statically_allocated);
+            CHECK_IF_FT(_Py_IsImmortal(op));
             break;
         default:
             Py_UNREACHABLE();
@@ -14208,6 +14216,18 @@ immortalize_interned(PyObject *s)
     FT_ATOMIC_STORE_UINT8(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL);
 }
 
+#ifdef Py_GIL_DISABLED
+static bool
+can_immortalize_safely(PyObject *s)
+{
+    if (_Py_IsOwnedByCurrentThread(s) || _Py_IsImmortal(s)) {
+        return true;
+    }
+    Py_ssize_t shared = _Py_atomic_load_ssize(&s->ob_ref_shared);
+    return _Py_REF_IS_MERGED(shared);
+}
+#endif
+
 static /* non-null */ PyObject*
 intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
               bool immortalize)
@@ -14236,11 +14256,16 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
             // no, go on
             break;
         case SSTATE_INTERNED_MORTAL:
+#ifndef Py_GIL_DISABLED
             // yes but we might need to make it immortal
             if (immortalize) {
                 immortalize_interned(s);
             }
             return s;
+#else
+            // not fully interned yet; fall through to the locking path
+            break;
+#endif
         default:
             // all done
             return s;
@@ -14305,6 +14330,23 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
         Py_DECREF(r);
     }
 #endif
+
+#ifdef Py_GIL_DISABLED
+    // Immortalization writes to the refcount fields non-atomically. That
+    // races with Py_INCREF / Py_DECREF on the thread that owns `s`. If we
+    // don't own it (and its refcount hasn't been merged), intern a copy
+    // we own instead.
+    if (!can_immortalize_safely(s)) {
+        PyObject *copy = _PyUnicode_Copy(s);
+        if (copy == NULL) {
+            PyErr_Clear();
+            return s;
+        }
+        Py_DECREF(s);
+        s = copy;
+    }
+#endif
+
     FT_MUTEX_LOCK(INTERN_MUTEX);
     PyObject *t;
     {