]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.14] gh-146041: Avoid lock in sys.intern() for already interned strings (gh-146072...
authorSam Gross <colesbury@gmail.com>
Wed, 25 Mar 2026 01:42:19 +0000 (21:42 -0400)
committerGitHub <noreply@github.com>
Wed, 25 Mar 2026 01:42:19 +0000 (21:42 -0400)
Fix free-threading scaling bottleneck in sys.intern and `PyObject_SetAttr` by
avoiding the interpreter-wide lock when the string is already interned and
immortalized.

(cherry picked from commit 60093096ba62110151d822b072a01061876e9404)

InternalDocs/string_interning.md
Misc/NEWS.d/next/Core_and_Builtins/2026-03-17-00-00-00.gh-issue-146041.7799bb.rst [new file with mode: 0644]
Objects/object.c
Objects/unicodeobject.c
Tools/ftscalingbench/ftscalingbench.py

index 26a5197c6e70f388dc97b261e6ea1c8ecf819ec4..0913b1a3471ef49fc31c50d43253c3a548b311d1 100644 (file)
@@ -52,15 +52,9 @@ The key and value of each entry in this dict reference the same object.
 
 ## Immortality and reference counting
 
-Invariant: Every immortal string is interned.
+In the GIL-enabled build interned strings may be mortal or immortal. In the
+free-threaded build, interned strings are always immortal.
 
-In practice, this means that you must not use `_Py_SetImmortal` on
-a string. (If you know it's already immortal, don't immortalize it;
-if you know it's not interned you might be immortalizing a redundant copy;
-if it's interned and mortal it needs extra processing in
-`_PyUnicode_InternImmortal`.)
-
-The converse is not true: interned strings can be mortal.
 For mortal interned strings:
 
 - the 2 references from the interned dict (key & value) are excluded from
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-03-17-00-00-00.gh-issue-146041.7799bb.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-17-00-00-00.gh-issue-146041.7799bb.rst
new file mode 100644 (file)
index 0000000..812f023
--- /dev/null
@@ -0,0 +1,3 @@
+Fix free-threading scaling bottleneck in :func:`sys.intern` and
+:c:func:`PyObject_SetAttr` by avoiding the interpreter-wide lock when the string
+is already interned and immortalized.
index e91e0f1e8b7219c0003546714e952f138d450a38..62e45f96bfae39c571a726290dfe1cb69fe84b28 100644 (file)
@@ -1954,7 +1954,7 @@ _PyObject_GenericSetAttrWithDict(PyObject *obj, PyObject *name,
     }
 
     Py_INCREF(name);
-    Py_INCREF(tp);
+    _Py_INCREF_TYPE(tp);
 
     PyThreadState *tstate = _PyThreadState_GET();
     _PyCStackRef cref;
@@ -2029,7 +2029,7 @@ _PyObject_GenericSetAttrWithDict(PyObject *obj, PyObject *name,
     }
   done:
     _PyThreadState_PopCStackRef(tstate, &cref);
-    Py_DECREF(tp);
+    _Py_DECREF_TYPE(tp);
     Py_DECREF(name);
     return res;
 }
@@ -2678,13 +2678,6 @@ _Py_NewReferenceNoTotal(PyObject *op)
 void
 _Py_SetImmortalUntracked(PyObject *op)
 {
-#ifdef Py_DEBUG
-    // For strings, use _PyUnicode_InternImmortal instead.
-    if (PyUnicode_CheckExact(op)) {
-        assert(PyUnicode_CHECK_INTERNED(op) == SSTATE_INTERNED_IMMORTAL
-            || PyUnicode_CHECK_INTERNED(op) == SSTATE_INTERNED_IMMORTAL_STATIC);
-    }
-#endif
     // Check if already immortal to avoid degrading from static immortal to plain immortal
     if (_Py_IsImmortal(op)) {
         return;
index aef89c15b30a21b8d3e06c37f2286056f1c048e6..1f1a4ab49abf9f185cce78384847fe1629e5a75b 100644 (file)
@@ -15954,8 +15954,11 @@ immortalize_interned(PyObject *s)
         _Py_DecRefTotal(_PyThreadState_GET());
     }
 #endif
-    FT_ATOMIC_STORE_UINT8_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL);
     _Py_SetImmortal(s);
+    // The switch to SSTATE_INTERNED_IMMORTAL must be the last thing done here
+    // to synchronize with the check in intern_common() that avoids locking if
+    // the string is already immortal.
+    FT_ATOMIC_STORE_UINT8(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL);
 }
 
 static /* non-null */ PyObject*
@@ -16035,7 +16038,25 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
     /* Do a setdefault on the per-interpreter cache. */
     PyObject *interned = get_interned_dict(interp);
     assert(interned != NULL);
-
+#ifdef Py_GIL_DISABLED
+    // Lock-free fast path: check if there's already an interned copy that
+    // is in its final immortal state.
+    PyObject *r;
+    int res = PyDict_GetItemRef(interned, s, &r);
+    if (res < 0) {
+        PyErr_Clear();
+        return s;
+    }
+    if (res > 0) {
+        unsigned int state = _Py_atomic_load_uint8(&_PyUnicode_STATE(r).interned);
+        if (state == SSTATE_INTERNED_IMMORTAL) {
+            Py_DECREF(s);
+            return r;
+        }
+        // Not yet fully interned; fall through to the locking path.
+        Py_DECREF(r);
+    }
+#endif
     LOCK_INTERNED(interp);
     PyObject *t;
     {
@@ -16072,7 +16093,7 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
         Py_DECREF(s);
         Py_DECREF(s);
     }
-    FT_ATOMIC_STORE_UINT8_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_MORTAL);
+    FT_ATOMIC_STORE_UINT8(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_MORTAL);
 
     /* INTERNED_MORTAL -> INTERNED_IMMORTAL (if needed) */
 
index 9c86822418c89265a164b48a1b2b7c424d1756dc..955d06d4c04c4c1708a005131bad3da5cbe802cd 100644 (file)
@@ -239,6 +239,15 @@ def staticmethod_call():
     for _ in range(1000 * WORK_SCALE):
         obj.my_staticmethod()
 
+@register_benchmark
+def setattr_non_interned():
+    prefix = "prefix"
+    obj = MyObject()
+    for _ in range(1000 * WORK_SCALE):
+        setattr(obj, f"{prefix}_a", None)
+        setattr(obj, f"{prefix}_b", None)
+        setattr(obj, f"{prefix}_c", None)
+
 
 def bench_one_thread(func):
     t0 = time.perf_counter_ns()