]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-59956: Clarify GILState-related Code (gh-101161)
authorEric Snow <ericsnowcurrently@gmail.com>
Thu, 19 Jan 2023 23:04:14 +0000 (16:04 -0700)
committerGitHub <noreply@github.com>
Thu, 19 Jan 2023 23:04:14 +0000 (16:04 -0700)
The objective of this change is to help make the GILState-related code easier to understand.  This mostly involves moving code around and some semantically equivalent refactors.  However, there are a also a small number of slight changes in structure and behavior:

* tstate_current is moved out of _PyRuntimeState.gilstate
* autoTSSkey is moved out of _PyRuntimeState.gilstate
* autoTSSkey is initialized earlier
* autoTSSkey is re-initialized (after fork) earlier

https://github.com/python/cpython/issues/59956

Include/cpython/pystate.h
Include/internal/pycore_pylifecycle.h
Include/internal/pycore_pystate.h
Include/internal/pycore_runtime.h
Include/internal/pycore_runtime_init.h
Modules/_threadmodule.c
Modules/posixmodule.c
Python/ceval_gil.c
Python/pylifecycle.c
Python/pystate.c

index 0117c23f518cdb6b8dd57f23811333af1ebbe47d..81944a50c47f39118c779d77822efcd3980b65d1 100644 (file)
@@ -114,11 +114,7 @@ struct _ts {
     PyThreadState *next;
     PyInterpreterState *interp;
 
-    /* Has been initialized to a safe state.
-
-       In order to be effective, this must be set to 0 during or right
-       after allocation. */
-    int _initialized;
+    int _status;
 
     int py_recursion_remaining;
     int py_recursion_limit;
index 370e4cbd59f97640c539dd7ac60a60f6dd491a42..2d431befd74f994fd80c36817349f04bde7d9b08 100644 (file)
@@ -69,7 +69,7 @@ extern void _PyThread_FiniType(PyInterpreterState *interp);
 extern void _Py_Deepfreeze_Fini(void);
 extern void _PyArg_Fini(void);
 
-extern PyStatus _PyGILState_Init(_PyRuntimeState *runtime);
+extern PyStatus _PyGILState_Init(PyInterpreterState *interp);
 extern PyStatus _PyGILState_SetTstate(PyThreadState *tstate);
 extern void _PyGILState_Fini(PyInterpreterState *interp);
 
index 736c2b30d8e097414e9e4a4d4ef960fe2b6129e3..0e46693c1f12834a7f99a939f5669eadc7d69111 100644 (file)
@@ -67,12 +67,12 @@ _Py_ThreadCanHandlePendingCalls(void)
 static inline PyThreadState*
 _PyRuntimeState_GetThreadState(_PyRuntimeState *runtime)
 {
-    return (PyThreadState*)_Py_atomic_load_relaxed(&runtime->gilstate.tstate_current);
+    return (PyThreadState*)_Py_atomic_load_relaxed(&runtime->tstate_current);
 }
 
 /* Get the current Python thread state.
 
-   Efficient macro reading directly the 'gilstate.tstate_current' atomic
+   Efficient macro reading directly the 'tstate_current' atomic
    variable. The macro is unsafe: it does not check for error and it can
    return NULL.
 
@@ -120,7 +120,7 @@ static inline PyInterpreterState* _PyInterpreterState_GET(void) {
 
 // PyThreadState functions
 
-PyAPI_FUNC(void) _PyThreadState_SetCurrent(PyThreadState *tstate);
+PyAPI_FUNC(void) _PyThreadState_Bind(PyThreadState *tstate);
 // We keep this around exclusively for stable ABI compatibility.
 PyAPI_FUNC(void) _PyThreadState_Init(
     PyThreadState *tstate);
@@ -139,17 +139,28 @@ _PyThreadState_UpdateTracingState(PyThreadState *tstate)
 }
 
 
+/* PyThreadState status */
+
+#define PyThreadState_UNINITIALIZED 0
+/* Has been initialized to a safe state.
+
+   In order to be effective, this must be set to 0 during or right
+   after allocation. */
+#define PyThreadState_INITIALIZED 1
+#define PyThreadState_BOUND 2
+#define PyThreadState_UNBOUND 3
+
+
 /* Other */
 
 PyAPI_FUNC(PyThreadState *) _PyThreadState_Swap(
-    struct _gilstate_runtime_state *gilstate,
+    _PyRuntimeState *runtime,
     PyThreadState *newts);
 
 PyAPI_FUNC(PyStatus) _PyInterpreterState_Enable(_PyRuntimeState *runtime);
 
 #ifdef HAVE_FORK
 extern PyStatus _PyInterpreterState_DeleteExceptMain(_PyRuntimeState *runtime);
-extern PyStatus _PyGILState_Reinit(_PyRuntimeState *runtime);
 extern void _PySignal_AfterFork(void);
 #endif
 
index d100e836c7d15386ee29666d2353a0d37a33c252..41e1b2cffb81470da04ada631d407ae9fc02915e 100644 (file)
@@ -41,15 +41,11 @@ struct _gilstate_runtime_state {
     /* bpo-26558: Flag to disable PyGILState_Check().
        If set to non-zero, PyGILState_Check() always return 1. */
     int check_enabled;
-    /* Assuming the current thread holds the GIL, this is the
-       PyThreadState for the current thread. */
-    _Py_atomic_address tstate_current;
     /* The single PyInterpreterState used by this process'
        GILState implementation
     */
     /* TODO: Given interp_main, it may be possible to kill this ref */
     PyInterpreterState *autoInterpreterState;
-    Py_tss_t autoTSSkey;
 };
 
 /* Runtime audit hook state */
@@ -124,6 +120,12 @@ typedef struct pyruntimestate {
 
     unsigned long main_thread;
 
+    /* Assuming the current thread holds the GIL, this is the
+       PyThreadState for the current thread. */
+    _Py_atomic_address tstate_current;
+    /* Used for the thread state bound to the current thread. */
+    Py_tss_t autoTSSkey;
+
     PyWideStringList orig_argv;
 
     struct _parser_runtime_state parser;
index cb3fce3732c79b636db308471dfc17ca3cc77c9e..f10dccc015801002cffe59bdfb1b8e7f11a7d06d 100644 (file)
@@ -33,6 +33,9 @@ extern "C" {
               until _PyInterpreterState_Enable() is called. */ \
             .next_id = -1, \
         }, \
+        /* A TSS key must be initialized with Py_tss_NEEDS_INIT \
+           in accordance with the specification. */ \
+        .autoTSSkey = Py_tss_NEEDS_INIT, \
         .parser = _parser_runtime_state_INIT, \
         .imports = { \
             .lock = { \
@@ -49,9 +52,6 @@ extern "C" {
         }, \
         .gilstate = { \
             .check_enabled = 1, \
-            /* A TSS key must be initialized with Py_tss_NEEDS_INIT \
-               in accordance with the specification. */ \
-            .autoTSSkey = Py_tss_NEEDS_INIT, \
         }, \
         .dtoa = _dtoa_runtime_state_INIT(runtime), \
         .fileutils = { \
index 80f467a8e21d66a112bacb81462ad6197793c651..bf4b6ec00e3e30a21c7faf54926ce82836c99f4a 100644 (file)
@@ -1074,13 +1074,7 @@ thread_run(void *boot_raw)
     PyThreadState *tstate;
 
     tstate = boot->tstate;
-    tstate->thread_id = PyThread_get_thread_ident();
-#ifdef PY_HAVE_THREAD_NATIVE_ID
-    tstate->native_thread_id = PyThread_get_thread_native_id();
-#else
-    tstate->native_thread_id = 0;
-#endif
-    _PyThreadState_SetCurrent(tstate);
+    _PyThreadState_Bind(tstate);
     PyEval_AcquireThread(tstate);
     tstate->interp->threads.count++;
 
index 0d4c179368ceac15f4100e30d89f74e164af8bb0..b84fb0d280f4e3195600fc30aeaa0aebff54f235 100644 (file)
@@ -587,7 +587,7 @@ PyOS_AfterFork_Child(void)
     PyStatus status;
     _PyRuntimeState *runtime = &_PyRuntime;
 
-    status = _PyGILState_Reinit(runtime);
+    status = _PyRuntimeState_ReInitThreads(runtime);
     if (_PyStatus_EXCEPTION(status)) {
         goto fatal_error;
     }
@@ -611,11 +611,6 @@ PyOS_AfterFork_Child(void)
 
     _PySignal_AfterFork();
 
-    status = _PyRuntimeState_ReInitThreads(runtime);
-    if (_PyStatus_EXCEPTION(status)) {
-        goto fatal_error;
-    }
-
     status = _PyInterpreterState_DeleteExceptMain(runtime);
     if (_PyStatus_EXCEPTION(status)) {
         goto fatal_error;
index 83f4e91e54573a487568fd0de23eac79673dddac..73d412ba4d71c967e3ddf377b59067acc0f64656 100644 (file)
@@ -581,8 +581,7 @@ PyEval_AcquireThread(PyThreadState *tstate)
 
     take_gil(tstate);
 
-    struct _gilstate_runtime_state *gilstate = &tstate->interp->runtime->gilstate;
-    if (_PyThreadState_Swap(gilstate, tstate) != NULL) {
+    if (_PyThreadState_Swap(tstate->interp->runtime, tstate) != NULL) {
         Py_FatalError("non-NULL old thread state");
     }
 }
@@ -593,7 +592,7 @@ PyEval_ReleaseThread(PyThreadState *tstate)
     assert(is_tstate_valid(tstate));
 
     _PyRuntimeState *runtime = tstate->interp->runtime;
-    PyThreadState *new_tstate = _PyThreadState_Swap(&runtime->gilstate, NULL);
+    PyThreadState *new_tstate = _PyThreadState_Swap(runtime, NULL);
     if (new_tstate != tstate) {
         Py_FatalError("wrong thread state");
     }
@@ -643,7 +642,7 @@ PyThreadState *
 PyEval_SaveThread(void)
 {
     _PyRuntimeState *runtime = &_PyRuntime;
-    PyThreadState *tstate = _PyThreadState_Swap(&runtime->gilstate, NULL);
+    PyThreadState *tstate = _PyThreadState_Swap(runtime, NULL);
     _Py_EnsureTstateNotNULL(tstate);
 
     struct _ceval_runtime_state *ceval = &runtime->ceval;
@@ -660,8 +659,7 @@ PyEval_RestoreThread(PyThreadState *tstate)
 
     take_gil(tstate);
 
-    struct _gilstate_runtime_state *gilstate = &tstate->interp->runtime->gilstate;
-    _PyThreadState_Swap(gilstate, tstate);
+    _PyThreadState_Swap(tstate->interp->runtime, tstate);
 }
 
 
@@ -965,7 +963,7 @@ _Py_HandlePending(PyThreadState *tstate)
     /* GIL drop request */
     if (_Py_atomic_load_relaxed_int32(&interp_ceval_state->gil_drop_request)) {
         /* Give another thread a chance */
-        if (_PyThreadState_Swap(&runtime->gilstate, NULL) != tstate) {
+        if (_PyThreadState_Swap(runtime, NULL) != tstate) {
             Py_FatalError("tstate mix-up");
         }
         drop_gil(ceval, interp_ceval_state, tstate);
@@ -974,7 +972,7 @@ _Py_HandlePending(PyThreadState *tstate)
 
         take_gil(tstate);
 
-        if (_PyThreadState_Swap(&runtime->gilstate, tstate) != NULL) {
+        if (_PyThreadState_Swap(runtime, tstate) != NULL) {
             Py_FatalError("orphan tstate");
         }
     }
index 1cb0e4d747e10accd55b549fb4b503b88892fc0d..5ef2d3f6aa72b078707a85bf8fb3c775f162c262 100644 (file)
@@ -675,12 +675,7 @@ pycore_create_interpreter(_PyRuntimeState *runtime,
                           const PyConfig *src_config,
                           PyThreadState **tstate_p)
 {
-    /* Auto-thread-state API */
-    PyStatus status = _PyGILState_Init(runtime);
-    if (_PyStatus_EXCEPTION(status)) {
-        return status;
-    }
-
+    PyStatus status;
     PyInterpreterState *interp = PyInterpreterState_New();
     if (interp == NULL) {
         return _PyStatus_ERR("can't make main interpreter");
@@ -692,6 +687,12 @@ pycore_create_interpreter(_PyRuntimeState *runtime,
         return status;
     }
 
+    /* Auto-thread-state API */
+    status = _PyGILState_Init(interp);
+    if (_PyStatus_EXCEPTION(status)) {
+        return status;
+    }
+
     const _PyInterpreterConfig config = _PyInterpreterConfig_LEGACY_INIT;
     init_interp_settings(interp, &config);
 
@@ -1795,10 +1796,8 @@ finalize_interp_clear(PyThreadState *tstate)
 static void
 finalize_interp_delete(PyInterpreterState *interp)
 {
-    if (_Py_IsMainInterpreter(interp)) {
-        /* Cleanup auto-thread-state */
-        _PyGILState_Fini(interp);
-    }
+    /* Cleanup auto-thread-state */
+    _PyGILState_Fini(interp);
 
     /* We can't call _PyEval_FiniGIL() here because destroying the GIL lock can
        fail when it is being awaited by another running daemon thread (see
index bf9b8db9a7ed460496e7dff73cac57593037d89a..756c1c73330d09e2f59aeefd88964905ff5b18c6 100644 (file)
@@ -10,7 +10,7 @@
 #include "pycore_pyerrors.h"
 #include "pycore_pylifecycle.h"
 #include "pycore_pymem.h"         // _PyMem_SetDefaultAllocator()
-#include "pycore_pystate.h"       // _PyThreadState_GET()
+#include "pycore_pystate.h"
 #include "pycore_runtime_init.h"  // _PyRuntimeState_INIT
 #include "pycore_sysmodule.h"
 
@@ -37,16 +37,205 @@ to avoid the expense of doing their own locking).
 extern "C" {
 #endif
 
-#define _PyRuntimeGILState_GetThreadState(gilstate) \
-    ((PyThreadState*)_Py_atomic_load_relaxed(&(gilstate)->tstate_current))
-#define _PyRuntimeGILState_SetThreadState(gilstate, value) \
-    _Py_atomic_store_relaxed(&(gilstate)->tstate_current, \
-                             (uintptr_t)(value))
-
 /* Forward declarations */
-static PyThreadState *_PyGILState_GetThisThreadState(struct _gilstate_runtime_state *gilstate);
 static void _PyThreadState_Delete(PyThreadState *tstate, int check_current);
 
+
+/* the current thread state */
+
+static inline PyThreadState *
+current_fast_get(_PyRuntimeState *runtime)
+{
+    // The GIL must be held by the current thread.
+    return (PyThreadState*)_Py_atomic_load_relaxed(&runtime->tstate_current);
+}
+
+static inline void
+current_fast_set(_PyRuntimeState *runtime, PyThreadState *tstate)
+{
+    // The GIL must be held by the current thread.
+    assert(tstate != NULL);
+    _Py_atomic_store_relaxed(&runtime->tstate_current, (uintptr_t)tstate);
+}
+
+static inline void
+current_fast_clear(_PyRuntimeState *runtime)
+{
+    // The GIL must be held by the current thread.
+    _Py_atomic_store_relaxed(&runtime->tstate_current, (uintptr_t)NULL);
+}
+
+
+static int
+current_tss_initialized(_PyRuntimeState *runtime)
+{
+    return PyThread_tss_is_created(&runtime->autoTSSkey);
+}
+
+static PyStatus
+current_tss_init(_PyRuntimeState *runtime)
+{
+    assert(!current_tss_initialized(runtime));
+    if (PyThread_tss_create(&runtime->autoTSSkey) != 0) {
+        return _PyStatus_NO_MEMORY();
+    }
+    return _PyStatus_OK();
+}
+
+static void
+current_tss_fini(_PyRuntimeState *runtime)
+{
+    assert(current_tss_initialized(runtime));
+    PyThread_tss_delete(&runtime->autoTSSkey);
+}
+
+static inline PyThreadState *
+current_tss_get(_PyRuntimeState *runtime)
+{
+    assert(current_tss_initialized(runtime));
+    return (PyThreadState *)PyThread_tss_get(&runtime->autoTSSkey);
+}
+
+static inline int
+_current_tss_set(_PyRuntimeState *runtime, PyThreadState *tstate)
+{
+    assert(tstate != NULL);
+    assert(current_tss_initialized(runtime));
+    return PyThread_tss_set(&runtime->autoTSSkey, (void *)tstate);
+}
+static inline void
+current_tss_set(_PyRuntimeState *runtime, PyThreadState *tstate)
+{
+    if (_current_tss_set(runtime, tstate) != 0) {
+        Py_FatalError("failed to set current tstate (TSS)");
+    }
+}
+
+static inline void
+current_tss_clear(_PyRuntimeState *runtime)
+{
+    assert(current_tss_initialized(runtime));
+    if (PyThread_tss_set(&runtime->autoTSSkey, NULL) != 0) {
+        Py_FatalError("failed to clear current tstate (TSS)");
+    }
+}
+
+#ifdef HAVE_FORK
+/* Reset the TSS key - called by PyOS_AfterFork_Child().
+ * This should not be necessary, but some - buggy - pthread implementations
+ * don't reset TSS upon fork(), see issue #10517.
+ */
+static PyStatus
+current_tss_reinit(_PyRuntimeState *runtime)
+{
+    if (!current_tss_initialized(runtime)) {
+        return _PyStatus_OK();
+    }
+    PyThreadState *tstate = current_tss_get(runtime);
+
+    current_tss_fini(runtime);
+    PyStatus status = current_tss_init(runtime);
+    if (_PyStatus_EXCEPTION(status)) {
+        return status;
+    }
+
+    /* If the thread had an associated auto thread state, reassociate it with
+     * the new key. */
+    if (tstate && _current_tss_set(runtime, tstate) != 0) {
+        return _PyStatus_ERR("failed to set autoTSSkey");
+    }
+    return _PyStatus_OK();
+}
+#endif
+
+static void
+bind_tstate(PyThreadState *tstate)
+{
+    assert(tstate != NULL);
+    assert(tstate->_status == PyThreadState_INITIALIZED);
+    assert(tstate->thread_id == 0);
+    assert(tstate->native_thread_id == 0);
+    _PyRuntimeState *runtime = tstate->interp->runtime;
+
+    /* Stick the thread state for this thread in thread specific storage.
+
+       The only situation where you can legitimately have more than one
+       thread state for an OS level thread is when there are multiple
+       interpreters.
+
+       You shouldn't really be using the PyGILState_ APIs anyway (see issues
+       #10915 and #15751).
+
+       The first thread state created for that given OS level thread will
+       "win", which seems reasonable behaviour.
+    */
+    /* When a thread state is created for a thread by some mechanism
+       other than PyGILState_Ensure(), it's important that the GILState
+       machinery knows about it so it doesn't try to create another
+       thread state for the thread.
+       (This is a better fix for SF bug #1010677 than the first one attempted.)
+    */
+    // XXX Skipping like this does not play nice with multiple interpreters.
+    if (current_tss_get(runtime) == NULL) {
+        current_tss_set(runtime, tstate);
+    }
+
+    tstate->thread_id = PyThread_get_thread_ident();
+#ifdef PY_HAVE_THREAD_NATIVE_ID
+    tstate->native_thread_id = PyThread_get_thread_native_id();
+#endif
+
+    tstate->_status = PyThreadState_BOUND;
+}
+
+static void
+unbind_tstate(PyThreadState *tstate)
+{
+    assert(tstate != NULL);
+    assert(tstate->_status == PyThreadState_BOUND);
+    assert(tstate->thread_id > 0);
+#ifdef PY_HAVE_THREAD_NATIVE_ID
+    assert(tstate->native_thread_id > 0);
+#endif
+    _PyRuntimeState *runtime = tstate->interp->runtime;
+
+    if (current_tss_initialized(runtime) &&
+        tstate == current_tss_get(runtime))
+    {
+        current_tss_clear(runtime);
+    }
+
+    // We leave thread_id and native_thraed_id alone
+    // since they can be useful for debugging.
+    // Check the `_status` field to know if these values
+    // are still valid.
+
+    tstate->_status = PyThreadState_UNBOUND;
+}
+
+/* This is not exported, as it is not reliable!  It can only
+   ever be compared to the state for the *current* thread.
+   * If not equal, then it doesn't matter that the actual
+     value may change immediately after comparison, as it can't
+     possibly change to the current thread's state.
+   * If equal, then the current thread holds the lock, so the value can't
+     change until we yield the lock.
+*/
+static int
+holds_gil(PyThreadState *tstate)
+{
+    // XXX Fall back to tstate->interp->runtime->ceval.gil.last_holder
+    // (and tstate->interp->runtime->ceval.gil.locked).
+    assert(tstate != NULL);
+    _PyRuntimeState *runtime = tstate->interp->runtime;
+    /* Must be the tstate for this thread */
+    assert(tstate == current_tss_get(runtime));
+    return tstate == current_fast_get(runtime);
+}
+
+
+/* the global runtime */
+
 /* Suppress deprecation warning for PyBytesObject.ob_shash */
 _Py_COMP_DIAG_PUSH
 _Py_COMP_DIAG_IGNORE_DEPR_DECLS
@@ -166,6 +355,13 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime)
         // Reset to _PyRuntimeState_INIT.
         memcpy(runtime, &initial, sizeof(*runtime));
     }
+
+    PyStatus status = current_tss_init(runtime);
+    if (_PyStatus_EXCEPTION(status)) {
+        _PyRuntimeState_Fini(runtime);
+        return status;
+    }
+
     init_runtime(runtime, open_code_hook, open_code_userdata, audit_hook_head,
                  unicode_next_index, lock1, lock2, lock3, lock4);
 
@@ -175,6 +371,10 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime)
 void
 _PyRuntimeState_Fini(_PyRuntimeState *runtime)
 {
+    if (current_tss_initialized(runtime)) {
+        current_tss_fini(runtime);
+    }
+
     /* Force the allocator used by _PyRuntimeState_Init(). */
     PyMemAllocatorEx old_alloc;
     _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
@@ -227,15 +427,17 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime)
         return _PyStatus_ERR("Failed to reinitialize runtime locks");
 
     }
+
+    PyStatus status = current_tss_reinit(runtime);
+    if (_PyStatus_EXCEPTION(status)) {
+        return status;
+    }
+
     return _PyStatus_OK();
 }
 #endif
 
 
-/* Forward declaration */
-static void _PyGILState_NoteThreadState(
-    struct _gilstate_runtime_state *gilstate, PyThreadState* tstate);
-
 PyStatus
 _PyInterpreterState_Enable(_PyRuntimeState *runtime)
 {
@@ -318,7 +520,8 @@ PyInterpreterState *
 PyInterpreterState_New(void)
 {
     PyInterpreterState *interp;
-    PyThreadState *tstate = _PyThreadState_GET();
+    _PyRuntimeState *runtime = &_PyRuntime;
+    PyThreadState *tstate = current_fast_get(runtime);
 
     /* tstate is NULL when Py_InitializeFromConfig() calls
        PyInterpreterState_New() to create the main interpreter. */
@@ -335,7 +538,6 @@ PyInterpreterState_New(void)
     }
 
     /* Don't get runtime from tstate since tstate can be NULL. */
-    _PyRuntimeState *runtime = &_PyRuntime;
     struct pyinterpreters *interpreters = &runtime->interpreters;
 
     /* We completely serialize creation of multiple interpreters, since
@@ -483,7 +685,7 @@ PyInterpreterState_Clear(PyInterpreterState *interp)
     // Use the current Python thread state to call audit hooks and to collect
     // garbage. It can be different than the current Python thread state
     // of 'interp'.
-    PyThreadState *current_tstate = _PyThreadState_GET();
+    PyThreadState *current_tstate = current_fast_get(interp->runtime);
 
     interpreter_clear(interp, current_tstate);
 }
@@ -518,7 +720,7 @@ PyInterpreterState_Delete(PyInterpreterState *interp)
     _PyEval_FiniState(&interp->ceval);
 
     /* Delete current thread. After this, many C API calls become crashy. */
-    _PyThreadState_Swap(&runtime->gilstate, NULL);
+    _PyThreadState_Swap(runtime, NULL);
 
     HEAD_LOCK(runtime);
     PyInterpreterState **p;
@@ -558,10 +760,9 @@ PyInterpreterState_Delete(PyInterpreterState *interp)
 PyStatus
 _PyInterpreterState_DeleteExceptMain(_PyRuntimeState *runtime)
 {
-    struct _gilstate_runtime_state *gilstate = &runtime->gilstate;
     struct pyinterpreters *interpreters = &runtime->interpreters;
 
-    PyThreadState *tstate = _PyThreadState_Swap(gilstate, NULL);
+    PyThreadState *tstate = _PyThreadState_Swap(runtime, NULL);
     if (tstate != NULL && tstate->interp != interpreters->main) {
         return _PyStatus_ERR("not main interpreter");
     }
@@ -591,7 +792,7 @@ _PyInterpreterState_DeleteExceptMain(_PyRuntimeState *runtime)
     if (interpreters->head == NULL) {
         return _PyStatus_ERR("missing main interpreter");
     }
-    _PyThreadState_Swap(gilstate, tstate);
+    _PyThreadState_Swap(runtime, tstate);
     return _PyStatus_OK();
 }
 #endif
@@ -600,7 +801,7 @@ _PyInterpreterState_DeleteExceptMain(_PyRuntimeState *runtime)
 PyInterpreterState *
 PyInterpreterState_Get(void)
 {
-    PyThreadState *tstate = _PyThreadState_GET();
+    PyThreadState *tstate = current_fast_get(&_PyRuntime);
     _Py_EnsureTstateNotNULL(tstate);
     PyInterpreterState *interp = tstate->interp;
     if (interp == NULL) {
@@ -691,8 +892,8 @@ void
 _PyInterpreterState_IDDecref(PyInterpreterState *interp)
 {
     assert(interp->id_mutex != NULL);
+    _PyRuntimeState *runtime = interp->runtime;
 
-    struct _gilstate_runtime_state *gilstate = &_PyRuntime.gilstate;
     PyThread_acquire_lock(interp->id_mutex, WAIT_LOCK);
     assert(interp->id_refcount != 0);
     interp->id_refcount -= 1;
@@ -703,9 +904,9 @@ _PyInterpreterState_IDDecref(PyInterpreterState *interp)
         // XXX Using the "head" thread isn't strictly correct.
         PyThreadState *tstate = PyInterpreterState_ThreadHead(interp);
         // XXX Possible GILState issues?
-        PyThreadState *save_tstate = _PyThreadState_Swap(gilstate, tstate);
+        PyThreadState *save_tstate = _PyThreadState_Swap(runtime, tstate);
         Py_EndInterpreter(tstate);
-        _PyThreadState_Swap(gilstate, save_tstate);
+        _PyThreadState_Swap(runtime, save_tstate);
     }
 }
 
@@ -790,7 +991,7 @@ init_threadstate(PyThreadState *tstate,
                  PyInterpreterState *interp, uint64_t id,
                  PyThreadState *next)
 {
-    if (tstate->_initialized) {
+    if (tstate->_status != PyThreadState_UNINITIALIZED) {
         Py_FatalError("thread state already initialized");
     }
 
@@ -809,10 +1010,7 @@ init_threadstate(PyThreadState *tstate,
     tstate->next = next;
     assert(tstate->prev == NULL);
 
-    tstate->thread_id = PyThread_get_thread_ident();
-#ifdef PY_HAVE_THREAD_NATIVE_ID
-    tstate->native_thread_id = PyThread_get_thread_native_id();
-#endif
+    // thread_id and native_thread_id are set in bind_tstate().
 
     tstate->py_recursion_limit = interp->ceval.recursion_limit,
     tstate->py_recursion_remaining = interp->ceval.recursion_limit,
@@ -820,12 +1018,16 @@ init_threadstate(PyThreadState *tstate,
 
     tstate->exc_info = &tstate->exc_state;
 
+    // PyGILState_Release must not try to delete this thread state.
+    // This is cleared when PyGILState_Ensure() creates the thread sate.
+    tstate->gilstate_counter = 1;
+
     tstate->cframe = &tstate->root_cframe;
     tstate->datastack_chunk = NULL;
     tstate->datastack_top = NULL;
     tstate->datastack_limit = NULL;
 
-    tstate->_initialized = 1;
+    tstate->_status = PyThreadState_INITIALIZED;
 }
 
 static PyThreadState *
@@ -884,11 +1086,12 @@ PyThreadState_New(PyInterpreterState *interp)
 {
     PyThreadState *tstate = new_threadstate(interp);
     if (tstate) {
-        _PyThreadState_SetCurrent(tstate);
+        bind_tstate(tstate);
     }
     return tstate;
 }
 
+// This must be followed by a call to _PyThreadState_Bind();
 PyThreadState *
 _PyThreadState_Prealloc(PyInterpreterState *interp)
 {
@@ -904,9 +1107,9 @@ _PyThreadState_Init(PyThreadState *tstate)
 }
 
 void
-_PyThreadState_SetCurrent(PyThreadState *tstate)
+_PyThreadState_Bind(PyThreadState *tstate)
 {
-    _PyGILState_NoteThreadState(&tstate->interp->runtime->gilstate, tstate);
+    bind_tstate(tstate);
 }
 
 PyObject*
@@ -968,7 +1171,7 @@ PyState_AddModule(PyObject* module, PyModuleDef* def)
         return -1;
     }
 
-    PyThreadState *tstate = _PyThreadState_GET();
+    PyThreadState *tstate = current_fast_get(&_PyRuntime);
     PyInterpreterState *interp = tstate->interp;
     Py_ssize_t index = def->m_base.m_index;
     if (interp->modules_by_index &&
@@ -984,7 +1187,7 @@ PyState_AddModule(PyObject* module, PyModuleDef* def)
 int
 PyState_RemoveModule(PyModuleDef* def)
 {
-    PyThreadState *tstate = _PyThreadState_GET();
+    PyThreadState *tstate = current_fast_get(&_PyRuntime);
     PyInterpreterState *interp = tstate->interp;
 
     if (def->m_slots) {
@@ -1089,8 +1292,7 @@ PyThreadState_Clear(PyThreadState *tstate)
 
 /* Common code for PyThreadState_Delete() and PyThreadState_DeleteCurrent() */
 static void
-tstate_delete_common(PyThreadState *tstate,
-                     struct _gilstate_runtime_state *gilstate)
+tstate_delete_common(PyThreadState *tstate)
 {
     _Py_EnsureTstateNotNULL(tstate);
     PyInterpreterState *interp = tstate->interp;
@@ -1111,11 +1313,10 @@ tstate_delete_common(PyThreadState *tstate,
     }
     HEAD_UNLOCK(runtime);
 
-    if (gilstate->autoInterpreterState &&
-        PyThread_tss_get(&gilstate->autoTSSkey) == tstate)
-    {
-        PyThread_tss_set(&gilstate->autoTSSkey, NULL);
-    }
+    // XXX Do this in PyThreadState_Swap() (and assert not-equal here)?
+    unbind_tstate(tstate);
+
+    // XXX Move to PyThreadState_Clear()?
     _PyStackChunk *chunk = tstate->datastack_chunk;
     tstate->datastack_chunk = NULL;
     while (chunk != NULL) {
@@ -1128,13 +1329,12 @@ tstate_delete_common(PyThreadState *tstate,
 static void
 _PyThreadState_Delete(PyThreadState *tstate, int check_current)
 {
-    struct _gilstate_runtime_state *gilstate = &tstate->interp->runtime->gilstate;
     if (check_current) {
-        if (tstate == _PyRuntimeGILState_GetThreadState(gilstate)) {
+        if (tstate == current_fast_get(tstate->interp->runtime)) {
             _Py_FatalErrorFormat(__func__, "tstate %p is still current", tstate);
         }
     }
-    tstate_delete_common(tstate, gilstate);
+    tstate_delete_common(tstate);
     free_threadstate(tstate);
 }
 
@@ -1150,9 +1350,8 @@ void
 _PyThreadState_DeleteCurrent(PyThreadState *tstate)
 {
     _Py_EnsureTstateNotNULL(tstate);
-    struct _gilstate_runtime_state *gilstate = &tstate->interp->runtime->gilstate;
-    tstate_delete_common(tstate, gilstate);
-    _PyRuntimeGILState_SetThreadState(gilstate, NULL);
+    tstate_delete_common(tstate);
+    current_fast_clear(tstate->interp->runtime);
     _PyEval_ReleaseLock(tstate);
     free_threadstate(tstate);
 }
@@ -1160,8 +1359,7 @@ _PyThreadState_DeleteCurrent(PyThreadState *tstate)
 void
 PyThreadState_DeleteCurrent(void)
 {
-    struct _gilstate_runtime_state *gilstate = &_PyRuntime.gilstate;
-    PyThreadState *tstate = _PyRuntimeGILState_GetThreadState(gilstate);
+    PyThreadState *tstate = current_fast_get(&_PyRuntime);
     _PyThreadState_DeleteCurrent(tstate);
 }
 
@@ -1211,38 +1409,45 @@ _PyThreadState_DeleteExcept(_PyRuntimeState *runtime, PyThreadState *tstate)
 PyThreadState *
 _PyThreadState_UncheckedGet(void)
 {
-    return _PyThreadState_GET();
+    return current_fast_get(&_PyRuntime);
 }
 
 
 PyThreadState *
 PyThreadState_Get(void)
 {
-    PyThreadState *tstate = _PyThreadState_GET();
+    PyThreadState *tstate = current_fast_get(&_PyRuntime);
     _Py_EnsureTstateNotNULL(tstate);
     return tstate;
 }
 
 
 PyThreadState *
-_PyThreadState_Swap(struct _gilstate_runtime_state *gilstate, PyThreadState *newts)
+_PyThreadState_Swap(_PyRuntimeState *runtime, PyThreadState *newts)
 {
-    PyThreadState *oldts = _PyRuntimeGILState_GetThreadState(gilstate);
+    PyThreadState *oldts = current_fast_get(runtime);
 
-    _PyRuntimeGILState_SetThreadState(gilstate, newts);
+    if (newts == NULL) {
+        current_fast_clear(runtime);
+    }
+    else {
+        current_fast_set(runtime, newts);
+    }
     /* It should not be possible for more than one thread state
        to be used for a thread.  Check this the best we can in debug
        builds.
     */
+    // XXX The above isn't true when multiple interpreters are involved.
 #if defined(Py_DEBUG)
-    if (newts) {
+    if (newts && current_tss_initialized(runtime)) {
         /* This can be called from PyEval_RestoreThread(). Similar
            to it, we need to ensure errno doesn't change.
         */
         int err = errno;
-        PyThreadState *check = _PyGILState_GetThisThreadState(gilstate);
-        if (check && check->interp == newts->interp && check != newts)
+        PyThreadState *check = current_tss_get(runtime);
+        if (check && check->interp == newts->interp && check != newts) {
             Py_FatalError("Invalid thread state for this thread");
+        }
         errno = err;
     }
 #endif
@@ -1252,7 +1457,7 @@ _PyThreadState_Swap(struct _gilstate_runtime_state *gilstate, PyThreadState *new
 PyThreadState *
 PyThreadState_Swap(PyThreadState *newts)
 {
-    return _PyThreadState_Swap(&_PyRuntime.gilstate, newts);
+    return _PyThreadState_Swap(&_PyRuntime, newts);
 }
 
 /* An extension mechanism to store arbitrary additional per-thread state.
@@ -1278,7 +1483,7 @@ _PyThreadState_GetDict(PyThreadState *tstate)
 PyObject *
 PyThreadState_GetDict(void)
 {
-    PyThreadState *tstate = _PyThreadState_GET();
+    PyThreadState *tstate = current_fast_get(&_PyRuntime);
     if (tstate == NULL) {
         return NULL;
     }
@@ -1401,7 +1606,8 @@ PyThreadState_Next(PyThreadState *tstate) {
 PyObject *
 _PyThread_CurrentFrames(void)
 {
-    PyThreadState *tstate = _PyThreadState_GET();
+    _PyRuntimeState *runtime = &_PyRuntime;
+    PyThreadState *tstate = current_fast_get(runtime);
     if (_PySys_Audit(tstate, "sys._current_frames", NULL) < 0) {
         return NULL;
     }
@@ -1417,7 +1623,6 @@ _PyThread_CurrentFrames(void)
      * Because these lists can mutate even when the GIL is held, we
      * need to grab head_mutex for the duration.
      */
-    _PyRuntimeState *runtime = tstate->interp->runtime;
     HEAD_LOCK(runtime);
     PyInterpreterState *i;
     for (i = runtime->interpreters.head; i != NULL; i = i->next) {
@@ -1457,7 +1662,8 @@ done:
 PyObject *
 _PyThread_CurrentExceptions(void)
 {
-    PyThreadState *tstate = _PyThreadState_GET();
+    _PyRuntimeState *runtime = &_PyRuntime;
+    PyThreadState *tstate = current_fast_get(runtime);
 
     _Py_EnsureTstateNotNULL(tstate);
 
@@ -1476,7 +1682,6 @@ _PyThread_CurrentExceptions(void)
      * Because these lists can mutate even when the GIL is held, we
      * need to grab head_mutex for the duration.
      */
-    _PyRuntimeState *runtime = tstate->interp->runtime;
     HEAD_LOCK(runtime);
     PyInterpreterState *i;
     for (i = runtime->interpreters.head; i != NULL; i = i->next) {
@@ -1515,40 +1720,37 @@ done:
 
 /* Python "auto thread state" API. */
 
-/* Keep this as a static, as it is not reliable!  It can only
-   ever be compared to the state for the *current* thread.
-   * If not equal, then it doesn't matter that the actual
-     value may change immediately after comparison, as it can't
-     possibly change to the current thread's state.
-   * If equal, then the current thread holds the lock, so the value can't
-     change until we yield the lock.
-*/
-static int
-PyThreadState_IsCurrent(PyThreadState *tstate)
-{
-    /* Must be the tstate for this thread */
-    struct _gilstate_runtime_state *gilstate = &_PyRuntime.gilstate;
-    assert(_PyGILState_GetThisThreadState(gilstate) == tstate);
-    return tstate == _PyRuntimeGILState_GetThreadState(gilstate);
-}
-
 /* Internal initialization/finalization functions called by
    Py_Initialize/Py_FinalizeEx
 */
 PyStatus
-_PyGILState_Init(_PyRuntimeState *runtime)
+_PyGILState_Init(PyInterpreterState *interp)
 {
-    struct _gilstate_runtime_state *gilstate = &runtime->gilstate;
-    if (PyThread_tss_create(&gilstate->autoTSSkey) != 0) {
-        return _PyStatus_NO_MEMORY();
+    if (!_Py_IsMainInterpreter(interp)) {
+        /* Currently, PyGILState is shared by all interpreters. The main
+         * interpreter is responsible to initialize it. */
+        return _PyStatus_OK();
     }
-    // PyThreadState_New() calls _PyGILState_NoteThreadState() which does
-    // nothing before autoInterpreterState is set.
-    assert(gilstate->autoInterpreterState == NULL);
+    _PyRuntimeState *runtime = interp->runtime;
+    assert(current_tss_get(runtime) == NULL);
+    assert(runtime->gilstate.autoInterpreterState == NULL);
+    runtime->gilstate.autoInterpreterState = interp;
     return _PyStatus_OK();
 }
 
+void
+_PyGILState_Fini(PyInterpreterState *interp)
+{
+    if (!_Py_IsMainInterpreter(interp)) {
+        /* Currently, PyGILState is shared by all interpreters. The main
+         * interpreter is responsible to initialize it. */
+        return;
+    }
+    interp->runtime->gilstate.autoInterpreterState = NULL;
+}
 
+
+// XXX Drop this.
 PyStatus
 _PyGILState_SetTstate(PyThreadState *tstate)
 {
@@ -1561,14 +1763,12 @@ _PyGILState_SetTstate(PyThreadState *tstate)
          * interpreter is responsible to initialize it. */
         return _PyStatus_OK();
     }
+    _PyRuntimeState *runtime = tstate->interp->runtime;
 
-    struct _gilstate_runtime_state *gilstate = &tstate->interp->runtime->gilstate;
-
-    gilstate->autoInterpreterState = tstate->interp;
-    assert(PyThread_tss_get(&gilstate->autoTSSkey) == NULL);
-    assert(tstate->gilstate_counter == 0);
+    assert(runtime->gilstate.autoInterpreterState == tstate->interp);
+    assert(current_tss_get(runtime) == tstate);
+    assert(tstate->gilstate_counter == 1);
 
-    _PyGILState_NoteThreadState(gilstate, tstate);
     return _PyStatus_OK();
 }
 
@@ -1578,118 +1778,42 @@ _PyGILState_GetInterpreterStateUnsafe(void)
     return _PyRuntime.gilstate.autoInterpreterState;
 }
 
-void
-_PyGILState_Fini(PyInterpreterState *interp)
-{
-    struct _gilstate_runtime_state *gilstate = &interp->runtime->gilstate;
-    PyThread_tss_delete(&gilstate->autoTSSkey);
-    gilstate->autoInterpreterState = NULL;
-}
-
-#ifdef HAVE_FORK
-/* Reset the TSS key - called by PyOS_AfterFork_Child().
- * This should not be necessary, but some - buggy - pthread implementations
- * don't reset TSS upon fork(), see issue #10517.
- */
-PyStatus
-_PyGILState_Reinit(_PyRuntimeState *runtime)
-{
-    struct _gilstate_runtime_state *gilstate = &runtime->gilstate;
-    PyThreadState *tstate = _PyGILState_GetThisThreadState(gilstate);
-
-    PyThread_tss_delete(&gilstate->autoTSSkey);
-    if (PyThread_tss_create(&gilstate->autoTSSkey) != 0) {
-        return _PyStatus_NO_MEMORY();
-    }
-
-    /* If the thread had an associated auto thread state, reassociate it with
-     * the new key. */
-    if (tstate &&
-        PyThread_tss_set(&gilstate->autoTSSkey, (void *)tstate) != 0)
-    {
-        return _PyStatus_ERR("failed to set autoTSSkey");
-    }
-    return _PyStatus_OK();
-}
-#endif
-
-/* When a thread state is created for a thread by some mechanism other than
-   PyGILState_Ensure, it's important that the GILState machinery knows about
-   it so it doesn't try to create another thread state for the thread (this is
-   a better fix for SF bug #1010677 than the first one attempted).
-*/
-static void
-_PyGILState_NoteThreadState(struct _gilstate_runtime_state *gilstate, PyThreadState* tstate)
-{
-    /* If autoTSSkey isn't initialized, this must be the very first
-       threadstate created in Py_Initialize().  Don't do anything for now
-       (we'll be back here when _PyGILState_Init is called). */
-    if (!gilstate->autoInterpreterState) {
-        return;
-    }
-
-    /* Stick the thread state for this thread in thread specific storage.
-
-       The only situation where you can legitimately have more than one
-       thread state for an OS level thread is when there are multiple
-       interpreters.
-
-       You shouldn't really be using the PyGILState_ APIs anyway (see issues
-       #10915 and #15751).
-
-       The first thread state created for that given OS level thread will
-       "win", which seems reasonable behaviour.
-    */
-    if (PyThread_tss_get(&gilstate->autoTSSkey) == NULL) {
-        if ((PyThread_tss_set(&gilstate->autoTSSkey, (void *)tstate)) != 0) {
-            Py_FatalError("Couldn't create autoTSSkey mapping");
-        }
-    }
-
-    /* PyGILState_Release must not try to delete this thread state. */
-    tstate->gilstate_counter = 1;
-}
-
 /* The public functions */
-static PyThreadState *
-_PyGILState_GetThisThreadState(struct _gilstate_runtime_state *gilstate)
-{
-    if (gilstate->autoInterpreterState == NULL)
-        return NULL;
-    return (PyThreadState *)PyThread_tss_get(&gilstate->autoTSSkey);
-}
 
 PyThreadState *
 PyGILState_GetThisThreadState(void)
 {
-    return _PyGILState_GetThisThreadState(&_PyRuntime.gilstate);
+    _PyRuntimeState *runtime = &_PyRuntime;
+    if (!current_tss_initialized(runtime)) {
+        return NULL;
+    }
+    return current_tss_get(runtime);
 }
 
 int
 PyGILState_Check(void)
 {
-    struct _gilstate_runtime_state *gilstate = &_PyRuntime.gilstate;
-    if (!gilstate->check_enabled) {
+    _PyRuntimeState *runtime = &_PyRuntime;
+    if (!runtime->gilstate.check_enabled) {
         return 1;
     }
 
-    if (!PyThread_tss_is_created(&gilstate->autoTSSkey)) {
+    if (!current_tss_initialized(runtime)) {
         return 1;
     }
 
-    PyThreadState *tstate = _PyRuntimeGILState_GetThreadState(gilstate);
+    PyThreadState *tstate = current_fast_get(runtime);
     if (tstate == NULL) {
         return 0;
     }
 
-    return (tstate == _PyGILState_GetThisThreadState(gilstate));
+    return (tstate == current_tss_get(runtime));
 }
 
 PyGILState_STATE
 PyGILState_Ensure(void)
 {
     _PyRuntimeState *runtime = &_PyRuntime;
-    struct _gilstate_runtime_state *gilstate = &runtime->gilstate;
 
     /* Note that we do not auto-init Python here - apart from
        potential races with 2 threads auto-initializing, pep-311
@@ -1699,27 +1823,29 @@ PyGILState_Ensure(void)
     /* Ensure that _PyEval_InitThreads() and _PyGILState_Init() have been
        called by Py_Initialize() */
     assert(_PyEval_ThreadsInitialized(runtime));
-    assert(gilstate->autoInterpreterState);
+    assert(current_tss_initialized(runtime));
+    assert(runtime->gilstate.autoInterpreterState != NULL);
 
-    PyThreadState *tcur = (PyThreadState *)PyThread_tss_get(&gilstate->autoTSSkey);
-    int current;
+    PyThreadState *tcur = current_tss_get(runtime);
+    int has_gil;
     if (tcur == NULL) {
         /* Create a new Python thread state for this thread */
-        tcur = PyThreadState_New(gilstate->autoInterpreterState);
+        tcur = PyThreadState_New(runtime->gilstate.autoInterpreterState);
         if (tcur == NULL) {
             Py_FatalError("Couldn't create thread-state for new thread");
         }
 
         /* This is our thread state!  We'll need to delete it in the
            matching call to PyGILState_Release(). */
+        assert(tcur->gilstate_counter == 1);
         tcur->gilstate_counter = 0;
-        current = 0; /* new thread state is never current */
+        has_gil = 0; /* new thread state is never current */
     }
     else {
-        current = PyThreadState_IsCurrent(tcur);
+        has_gil = holds_gil(tcur);
     }
 
-    if (current == 0) {
+    if (!has_gil) {
         PyEval_RestoreThread(tcur);
     }
 
@@ -1730,14 +1856,14 @@ PyGILState_Ensure(void)
     */
     ++tcur->gilstate_counter;
 
-    return current ? PyGILState_LOCKED : PyGILState_UNLOCKED;
+    return has_gil ? PyGILState_LOCKED : PyGILState_UNLOCKED;
 }
 
 void
 PyGILState_Release(PyGILState_STATE oldstate)
 {
     _PyRuntimeState *runtime = &_PyRuntime;
-    PyThreadState *tstate = PyThread_tss_get(&runtime->gilstate.autoTSSkey);
+    PyThreadState *tstate = current_tss_get(runtime);
     if (tstate == NULL) {
         Py_FatalError("auto-releasing thread-state, "
                       "but no thread-state for this thread");
@@ -1748,12 +1874,12 @@ PyGILState_Release(PyGILState_STATE oldstate)
        but while this is very new (April 2003), the extra check
        by release-only users can't hurt.
     */
-    if (!PyThreadState_IsCurrent(tstate)) {
+    if (!holds_gil(tstate)) {
         _Py_FatalErrorFormat(__func__,
                              "thread state %p must be current when releasing",
                              tstate);
     }
-    assert(PyThreadState_IsCurrent(tstate));
+    assert(holds_gil(tstate));
     --tstate->gilstate_counter;
     assert(tstate->gilstate_counter >= 0); /* illegal counter value */
 
@@ -1769,12 +1895,13 @@ PyGILState_Release(PyGILState_STATE oldstate)
          * races; see bugs 225673 and 1061968 (that nasty bug has a
          * habit of coming back).
          */
-        assert(_PyRuntimeGILState_GetThreadState(&runtime->gilstate) == tstate);
+        assert(current_fast_get(runtime) == tstate);
         _PyThreadState_DeleteCurrent(tstate);
     }
     /* Release the lock if necessary */
-    else if (oldstate == PyGILState_UNLOCKED)
+    else if (oldstate == PyGILState_UNLOCKED) {
         PyEval_SaveThread();
+    }
 }
 
 
@@ -1905,7 +2032,8 @@ _PyObject_CheckCrossInterpreterData(PyObject *obj)
 int
 _PyObject_GetCrossInterpreterData(PyObject *obj, _PyCrossInterpreterData *data)
 {
-    PyThreadState *tstate = _PyThreadState_GET();
+    _PyRuntimeState *runtime = &_PyRuntime;
+    PyThreadState *tstate = current_fast_get(runtime);
 #ifdef Py_DEBUG
     // The caller must hold the GIL
     _Py_EnsureTstateNotNULL(tstate);
@@ -1948,19 +2076,19 @@ _PyCrossInterpreterData_NewObject(_PyCrossInterpreterData *data)
 typedef void (*releasefunc)(PyInterpreterState *, void *);
 
 static void
-_call_in_interpreter(struct _gilstate_runtime_state *gilstate,
-                     PyInterpreterState *interp, releasefunc func, void *arg)
+_call_in_interpreter(PyInterpreterState *interp, releasefunc func, void *arg)
 {
     /* We would use Py_AddPendingCall() if it weren't specific to the
      * main interpreter (see bpo-33608).  In the meantime we take a
      * naive approach.
      */
+    _PyRuntimeState *runtime = interp->runtime;
     PyThreadState *save_tstate = NULL;
-    if (interp != _PyRuntimeGILState_GetThreadState(gilstate)->interp) {
+    if (interp != current_fast_get(runtime)->interp) {
         // XXX Using the "head" thread isn't strictly correct.
         PyThreadState *tstate = PyInterpreterState_ThreadHead(interp);
         // XXX Possible GILState issues?
-        save_tstate = _PyThreadState_Swap(gilstate, tstate);
+        save_tstate = _PyThreadState_Swap(runtime, tstate);
     }
 
     // XXX Once the GIL is per-interpreter, this should be called with the
@@ -1969,7 +2097,7 @@ _call_in_interpreter(struct _gilstate_runtime_state *gilstate,
 
     // Switch back.
     if (save_tstate != NULL) {
-        _PyThreadState_Swap(gilstate, save_tstate);
+        _PyThreadState_Swap(runtime, save_tstate);
     }
 }
 
@@ -1993,8 +2121,7 @@ _PyCrossInterpreterData_Release(_PyCrossInterpreterData *data)
     }
 
     // "Release" the data and/or the object.
-    struct _gilstate_runtime_state *gilstate = &_PyRuntime.gilstate;
-    _call_in_interpreter(gilstate, interp,
+    _call_in_interpreter(interp,
                          (releasefunc)_PyCrossInterpreterData_Clear, data);
     return 0;
 }
@@ -2314,8 +2441,9 @@ _PyInterpreterState_GetConfigCopy(PyConfig *config)
 const PyConfig*
 _Py_GetConfig(void)
 {
+    _PyRuntimeState *runtime = &_PyRuntime;
     assert(PyGILState_Check());
-    PyThreadState *tstate = _PyThreadState_GET();
+    PyThreadState *tstate = current_fast_get(runtime);
     _Py_EnsureTstateNotNULL(tstate);
     return _PyInterpreterState_GetConfig(tstate->interp);
 }