]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-148072: Cache pickle.dumps/loads per interpreter in XIData (GH-148125)
authorJunya Fukuda <junya.fukuda.e@gmail.com>
Mon, 6 Apr 2026 15:37:02 +0000 (00:37 +0900)
committerGitHub <noreply@github.com>
Mon, 6 Apr 2026 15:37:02 +0000 (11:37 -0400)
Store references to pickle.dumps and pickle.loads in _PyXI_state_t
so they are looked up only once per interpreter lifetime, avoiding
repeated PyImport_ImportModuleAttrString calls on every cross-interpreter
data transfer via pickle fallback.

Benchmarks show 1.7x-3.3x speedup for InterpreterPoolExecutor
when transferring mutable types (list, dict) through XIData.

Include/internal/pycore_crossinterp.h
Misc/NEWS.d/next/Core_and_Builtins/2026-04-05-00-00-00.gh-issue-148072.xid9Pe.rst [new file with mode: 0644]
Python/crossinterp.c

index 81faffac1941719cfccd5c44f0b8b4fae81f45fe..bed966681fa1f0c3df6d1e7d7a315880f8a0b5a6 100644 (file)
@@ -265,6 +265,12 @@ typedef struct {
         // heap types
         PyObject *PyExc_NotShareableError;
     } exceptions;
+
+    // Cached references to pickle.dumps/loads (per-interpreter).
+    struct {
+        PyObject *dumps;
+        PyObject *loads;
+    } pickle;
 } _PyXI_state_t;
 
 #define _PyXI_GET_GLOBAL_STATE(interp) (&(interp)->runtime->xi)
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-05-00-00-00.gh-issue-148072.xid9Pe.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-05-00-00-00.gh-issue-148072.xid9Pe.rst
new file mode 100644 (file)
index 0000000..17c6f88
--- /dev/null
@@ -0,0 +1,4 @@
+Cache ``pickle.dumps`` and ``pickle.loads`` per interpreter in the XIData
+framework, avoiding repeated module lookups on every cross-interpreter data
+transfer.  This speeds up :class:`~concurrent.futures.InterpreterPoolExecutor`
+for mutable types (``list``, ``dict``) by 1.7x--3.3x.
index f92927da47532128879f9228d7525fc0ab43cfc4..4cd4b32ef906bb3de95ab63b32e945537d654f32 100644 (file)
@@ -568,6 +568,48 @@ _PyObject_GetXIData(PyThreadState *tstate,
 
 /* pickle C-API */
 
+/* Per-interpreter cache for pickle.dumps and pickle.loads.
+ *
+ * Each interpreter has its own cache in _PyXI_state_t.pickle, preserving
+ * interpreter isolation.  The cache is populated lazily on first use and
+ * cleared during interpreter finalization in _Py_xi_state_fini().
+ *
+ * Note: the cached references are captured at first use and not invalidated
+ * on module reload.  This matches the caching pattern used elsewhere in
+ * CPython (e.g. arraymodule.c, _decimal.c). */
+
+static PyObject *
+_get_pickle_dumps(PyThreadState *tstate)
+{
+    _PyXI_state_t *state = _PyXI_GET_STATE(tstate->interp);
+    PyObject *dumps = state->pickle.dumps;
+    if (dumps != NULL) {
+        return dumps;
+    }
+    dumps = PyImport_ImportModuleAttrString("pickle", "dumps");
+    if (dumps == NULL) {
+        return NULL;
+    }
+    state->pickle.dumps = dumps;  // owns the reference
+    return dumps;
+}
+
+static PyObject *
+_get_pickle_loads(PyThreadState *tstate)
+{
+    _PyXI_state_t *state = _PyXI_GET_STATE(tstate->interp);
+    PyObject *loads = state->pickle.loads;
+    if (loads != NULL) {
+        return loads;
+    }
+    loads = PyImport_ImportModuleAttrString("pickle", "loads");
+    if (loads == NULL) {
+        return NULL;
+    }
+    state->pickle.loads = loads;  // owns the reference
+    return loads;
+}
+
 struct _pickle_context {
     PyThreadState *tstate;
 };
@@ -575,13 +617,12 @@ struct _pickle_context {
 static PyObject *
 _PyPickle_Dumps(struct _pickle_context *ctx, PyObject *obj)
 {
-    PyObject *dumps = PyImport_ImportModuleAttrString("pickle", "dumps");
+    PyObject *dumps = _get_pickle_dumps(ctx->tstate);
     if (dumps == NULL) {
         return NULL;
     }
-    PyObject *bytes = PyObject_CallOneArg(dumps, obj);
-    Py_DECREF(dumps);
-    return bytes;
+    // dumps is a borrowed reference from the cache.
+    return PyObject_CallOneArg(dumps, obj);
 }
 
 
@@ -636,7 +677,8 @@ _PyPickle_Loads(struct _unpickle_context *ctx, PyObject *pickled)
     PyThreadState *tstate = ctx->tstate;
 
     PyObject *exc = NULL;
-    PyObject *loads = PyImport_ImportModuleAttrString("pickle", "loads");
+    // loads is a borrowed reference from the per-interpreter cache.
+    PyObject *loads = _get_pickle_loads(tstate);
     if (loads == NULL) {
         return NULL;
     }
@@ -682,7 +724,6 @@ finally:
         // It might make sense to chain it (__context__).
         _PyErr_SetRaisedException(tstate, exc);
     }
-    Py_DECREF(loads);
     return obj;
 }
 
@@ -3094,6 +3135,10 @@ _Py_xi_state_init(_PyXI_state_t *state, PyInterpreterState *interp)
     assert(state != NULL);
     assert(interp == NULL || state == _PyXI_GET_STATE(interp));
 
+    // Initialize pickle function cache (before any fallible ops).
+    state->pickle.dumps = NULL;
+    state->pickle.loads = NULL;
+
     xid_lookup_init(&state->data_lookup);
 
     // Initialize exceptions.
@@ -3116,6 +3161,11 @@ _Py_xi_state_fini(_PyXI_state_t *state, PyInterpreterState *interp)
     assert(state != NULL);
     assert(interp == NULL || state == _PyXI_GET_STATE(interp));
 
+    // Clear pickle function cache first: the cached functions may hold
+    // references to modules cleaned up by later finalization steps.
+    Py_CLEAR(state->pickle.dumps);
+    Py_CLEAR(state->pickle.loads);
+
     fini_heap_exctypes(&state->exceptions);
     if (interp != NULL) {
         fini_static_exctypes(&state->exceptions, interp);