]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-103323: Get the "Current" Thread State from a Thread-Local Variable (gh-103324)
authorEric Snow <ericsnowcurrently@gmail.com>
Mon, 24 Apr 2023 17:17:02 +0000 (11:17 -0600)
committerGitHub <noreply@github.com>
Mon, 24 Apr 2023 17:17:02 +0000 (11:17 -0600)
We replace _PyRuntime.tstate_current with a thread-local variable. As part of this change, we add a _Py_thread_local macro in pyport.h (only for the core runtime) to smooth out the compiler differences. The main motivation here is in support of a per-interpreter GIL, but this change also provides some performance improvement opportunities.

Note that we do not provide a fallback to the thread-local, either falling back to the old tstate_current or to thread-specific storage (PyThread_tss_*()). If that proves problematic then we can circle back. I consider it unlikely, but will run the buildbots to double-check.

Also note that this does not change any of the code related to the GILState API, where it uses a thread state stored in thread-specific storage. I suspect we can combine that with _Py_tss_tstate (from here). However, that can be addressed separately and is not urgent (nor critical).

(While this change was mostly done independently, I did take some inspiration from earlier (~2020) work by @markshannon (main...markshannon:threadstate_in_tls) and @vstinner (#23976).)

Include/internal/pycore_pystate.h
Include/internal/pycore_runtime.h
Include/pyport.h
Misc/NEWS.d/next/Core and Builtins/2023-04-07-12-18-41.gh-issue-103323.9802br.rst [new file with mode: 0644]
Python/pystate.c

index 6e5f2289cb6b95a6127e3910a34d110626610997..c40f9e7393a16f2c6e623cc52f9af647c2e8d275 100644 (file)
@@ -64,17 +64,14 @@ _Py_ThreadCanHandlePendingCalls(void)
 /* Variable and macro for in-line access to current thread
    and interpreter state */
 
-static inline PyThreadState*
-_PyRuntimeState_GetThreadState(_PyRuntimeState *runtime)
-{
-    return (PyThreadState*)_Py_atomic_load_relaxed(&runtime->tstate_current);
-}
+#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE)
+extern _Py_thread_local PyThreadState *_Py_tss_tstate;
+#endif
+PyAPI_DATA(PyThreadState *) _PyThreadState_GetCurrent(void);
 
 /* Get the current Python thread state.
 
-   Efficient macro reading directly the 'tstate_current' atomic
-   variable. The macro is unsafe: it does not check for error and it can
-   return NULL.
+   This function is unsafe: it does not check for error and it can return NULL.
 
    The caller must hold the GIL.
 
@@ -82,9 +79,20 @@ _PyRuntimeState_GetThreadState(_PyRuntimeState *runtime)
 static inline PyThreadState*
 _PyThreadState_GET(void)
 {
-    return _PyRuntimeState_GetThreadState(&_PyRuntime);
+#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE)
+    return _Py_tss_tstate;
+#else
+    return _PyThreadState_GetCurrent();
+#endif
+}
+
+static inline PyThreadState*
+_PyRuntimeState_GetThreadState(_PyRuntimeState *Py_UNUSED(runtime))
+{
+    return _PyThreadState_GET();
 }
 
+
 static inline void
 _Py_EnsureFuncTstateNotNULL(const char *func, PyThreadState *tstate)
 {
index 3ebe49926edda65df9ed6e702cdbaacf9ed5869f..2a3fd8ab2813ea11ed0e4383e29ca32faced79f3 100644 (file)
@@ -119,9 +119,6 @@ typedef struct pyruntimestate {
 
     unsigned long main_thread;
 
-    /* Assuming the current thread holds the GIL, this is the
-       PyThreadState for the current thread. */
-    _Py_atomic_address tstate_current;
     /* Used for the thread state bound to the current thread. */
     Py_tss_t autoTSSkey;
 
index 5e226f5cb46751446ea4d3b065b6ab15239d6a6a..bd0ba6d0681b21e0cf30b531a688f5ce30d59683 100644 (file)
@@ -662,6 +662,27 @@ extern char * _getpty(int *, int, mode_t, int);
 #  define WITH_THREAD
 #endif
 
+#ifdef WITH_THREAD
+#  ifdef Py_BUILD_CORE
+#    ifdef HAVE_THREAD_LOCAL
+#      error "HAVE_THREAD_LOCAL is already defined"
+#    endif
+#    define HAVE_THREAD_LOCAL 1
+#    ifdef thread_local
+#      define _Py_thread_local thread_local
+#    elif __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__)
+#      define _Py_thread_local _Thread_local
+#    elif defined(_MSC_VER)  /* AKA NT_THREADS */
+#      define _Py_thread_local __declspec(thread)
+#    elif defined(__GNUC__)  /* includes clang */
+#      define _Py_thread_local __thread
+#    else
+       // fall back to the PyThread_tss_*() API, or ignore.
+#      undef HAVE_THREAD_LOCAL
+#    endif
+#  endif
+#endif
+
 /* Check that ALT_SOABI is consistent with Py_TRACE_REFS:
    ./configure --with-trace-refs should must be used to define Py_TRACE_REFS */
 #if defined(ALT_SOABI) && defined(Py_TRACE_REFS)
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-04-07-12-18-41.gh-issue-103323.9802br.rst b/Misc/NEWS.d/next/Core and Builtins/2023-04-07-12-18-41.gh-issue-103323.9802br.rst
new file mode 100644 (file)
index 0000000..347c91d
--- /dev/null
@@ -0,0 +1,3 @@
+We've replaced our use of ``_PyRuntime.tstate_current`` with a thread-local
+variable.  This is a fairly low-level implementation detail, and there
+should be no change in behavior.
index 1e04887ef04a2caa549ddff2af4e17681aee709c..d108cfc7e50a0a301405f6b8bacaa00b62d2e914 100644 (file)
@@ -60,23 +60,43 @@ extern "C" {
    For each of these functions, the GIL must be held by the current thread.
  */
 
+
+#ifdef HAVE_THREAD_LOCAL
+_Py_thread_local PyThreadState *_Py_tss_tstate = NULL;
+#endif
+
 static inline PyThreadState *
-current_fast_get(_PyRuntimeState *runtime)
+current_fast_get(_PyRuntimeState *Py_UNUSED(runtime))
 {
-    return (PyThreadState*)_Py_atomic_load_relaxed(&runtime->tstate_current);
+#ifdef HAVE_THREAD_LOCAL
+    return _Py_tss_tstate;
+#else
+    // XXX Fall back to the PyThread_tss_*() API.
+#  error "no supported thread-local variable storage classifier"
+#endif
 }
 
 static inline void
-current_fast_set(_PyRuntimeState *runtime, PyThreadState *tstate)
+current_fast_set(_PyRuntimeState *Py_UNUSED(runtime), PyThreadState *tstate)
 {
     assert(tstate != NULL);
-    _Py_atomic_store_relaxed(&runtime->tstate_current, (uintptr_t)tstate);
+#ifdef HAVE_THREAD_LOCAL
+    _Py_tss_tstate = tstate;
+#else
+    // XXX Fall back to the PyThread_tss_*() API.
+#  error "no supported thread-local variable storage classifier"
+#endif
 }
 
 static inline void
-current_fast_clear(_PyRuntimeState *runtime)
+current_fast_clear(_PyRuntimeState *Py_UNUSED(runtime))
 {
-    _Py_atomic_store_relaxed(&runtime->tstate_current, (uintptr_t)NULL);
+#ifdef HAVE_THREAD_LOCAL
+    _Py_tss_tstate = NULL;
+#else
+    // XXX Fall back to the PyThread_tss_*() API.
+#  error "no supported thread-local variable storage classifier"
+#endif
 }
 
 #define tstate_verify_not_active(tstate) \
@@ -84,6 +104,12 @@ current_fast_clear(_PyRuntimeState *runtime)
         _Py_FatalErrorFormat(__func__, "tstate %p is still current", tstate); \
     }
 
+PyThreadState *
+_PyThreadState_GetCurrent(void)
+{
+    return current_fast_get(&_PyRuntime);
+}
+
 
 //------------------------------------------------
 // the thread state bound to the current OS thread