]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-150942: Speed up json.loads array and object decoding (GH-150945)
authorPieter Eendebak <pieter.eendebak@gmail.com>
Thu, 11 Jun 2026 16:38:49 +0000 (18:38 +0200)
committerGitHub <noreply@github.com>
Thu, 11 Jun 2026 16:38:49 +0000 (17:38 +0100)
Append parsed values to the result list with _PyList_AppendTakeRef and
insert key/value pairs with _PyDict_SetItem_Take2, which take ownership of
the references instead of incref-ing on insert and then decref-ing the
local.  This removes a reference-count round-trip per element (and, on the
free-threaded build, a per-append lock).

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Misc/NEWS.d/next/Library/2026-05-31-10-00-00.gh-issue-150942.Tk9aRef.rst [new file with mode: 0644]
Modules/_json.c

diff --git a/Misc/NEWS.d/next/Library/2026-05-31-10-00-00.gh-issue-150942.Tk9aRef.rst b/Misc/NEWS.d/next/Library/2026-05-31-10-00-00.gh-issue-150942.Tk9aRef.rst
new file mode 100644 (file)
index 0000000..ba0de8f
--- /dev/null
@@ -0,0 +1,3 @@
+Speed up :func:`json.loads` decoding of arrays and objects by storing
+parsed values into the result list/dict without an extra reference-count
+round-trip (using the internal reference-stealing append/insert helpers).
index 6c4f38834631d30e36929e8fdcf207bb48e22ed8..b057b56b2f9f8d93234614f409efa6b8bbf7c71e 100644 (file)
@@ -11,6 +11,8 @@
 #include "Python.h"
 #include "pycore_ceval.h"         // _Py_EnterRecursiveCall()
 #include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST()
+#include "pycore_dict.h"          // _PyDict_SetItem_Take2()
+#include "pycore_list.h"          // _PyList_AppendTakeRef()
 #include "pycore_global_strings.h" // _Py_ID()
 #include "pycore_pyerrors.h"      // _PyErr_FormatNote
 #include "pycore_runtime.h"       // _PyRuntime
@@ -752,7 +754,6 @@ _parse_object_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ss
     const void *str;
     int kind;
     Py_ssize_t end_idx;
-    PyObject *val = NULL;
     PyObject *rval = NULL;
     PyObject *key = NULL;
     int has_pairs_hook = (s->object_pairs_hook != Py_None);
@@ -802,13 +803,16 @@ _parse_object_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ss
             while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
 
             /* read any JSON term */
-            val = scan_once_unicode(s, memo, pystr, idx, &next_idx);
+            PyObject *val = scan_once_unicode(s, memo, pystr, idx, &next_idx);
             if (val == NULL)
                 goto bail;
 
+            /* The steal below takes our references to both key and val
+               (releasing them on failure).  Only key is reset for the bail
+               path; val is never live there, so it needs no cleanup. */
             if (has_pairs_hook) {
                 PyObject *item = _PyTuple_FromPairSteal(key, val);
-                key = val = NULL;
+                key = NULL;
                 if (item == NULL)
                     goto bail;
                 if (PyList_Append(rval, item) == -1) {
@@ -818,10 +822,10 @@ _parse_object_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ss
                 Py_DECREF(item);
             }
             else {
-                if (PyDict_SetItem(rval, key, val) < 0)
+                int err = _PyDict_SetItem_Take2((PyDictObject *)rval, key, val);
+                key = NULL;
+                if (err < 0)
                     goto bail;
-                Py_CLEAR(key);
-                Py_CLEAR(val);
             }
             idx = next_idx;
 
@@ -851,21 +855,20 @@ _parse_object_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ss
     *next_idx_ptr = idx + 1;
 
     if (has_pairs_hook) {
-        val = PyObject_CallOneArg(s->object_pairs_hook, rval);
+        PyObject *res = PyObject_CallOneArg(s->object_pairs_hook, rval);
         Py_DECREF(rval);
-        return val;
+        return res;
     }
 
     /* if object_hook is not None: rval = object_hook(rval) */
     if (s->object_hook != Py_None) {
-        val = PyObject_CallOneArg(s->object_hook, rval);
+        PyObject *res = PyObject_CallOneArg(s->object_hook, rval);
         Py_DECREF(rval);
-        return val;
+        return res;
     }
     return rval;
 bail:
     Py_XDECREF(key);
-    Py_XDECREF(val);
     Py_XDECREF(rval);
     return NULL;
 }
@@ -882,7 +885,6 @@ _parse_array_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ssi
     const void *str;
     int kind;
     Py_ssize_t end_idx;
-    PyObject *val = NULL;
     PyObject *rval;
     Py_ssize_t next_idx;
     Py_ssize_t comma_idx;
@@ -903,14 +905,12 @@ _parse_array_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ssi
         while (1) {
 
             /* read any JSON term  */
-            val = scan_once_unicode(s, memo, pystr, idx, &next_idx);
+            PyObject *val = scan_once_unicode(s, memo, pystr, idx, &next_idx);
             if (val == NULL)
                 goto bail;
 
-            if (PyList_Append(rval, val) == -1)
+            if (_PyList_AppendTakeRef((PyListObject *)rval, val) < 0)
                 goto bail;
-
-            Py_CLEAR(val);
             idx = next_idx;
 
             /* skip whitespace between term and , */
@@ -944,13 +944,12 @@ _parse_array_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ssi
     *next_idx_ptr = idx + 1;
     /* if array_hook is not None: return array_hook(rval) */
     if (!Py_IsNone(s->array_hook)) {
-        val = PyObject_CallOneArg(s->array_hook, rval);
+        PyObject *res = PyObject_CallOneArg(s->array_hook, rval);
         Py_DECREF(rval);
-        return val;
+        return res;
     }
     return rval;
 bail:
-    Py_XDECREF(val);
     Py_DECREF(rval);
     return NULL;
 }