]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-95382: Use cache for indentations in the JSON encoder (GH-118636)
authorSerhiy Storchaka <storchaka@gmail.com>
Tue, 12 Nov 2024 17:19:15 +0000 (19:19 +0200)
committerGitHub <noreply@github.com>
Tue, 12 Nov 2024 17:19:15 +0000 (17:19 +0000)
Modules/_json.c

index ce0093ab431d05befda09e0ada29f1ed0806f66f..a99abbe72bf7a08072ee54cd3e8612c20e933450 100644 (file)
@@ -86,11 +86,11 @@ encoder_dealloc(PyObject *self);
 static int
 encoder_clear(PyEncoderObject *self);
 static int
-encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent);
+encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level, PyObject *indent_cache);
 static int
-encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent);
+encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level, PyObject *indent_cache);
 static int
-encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent);
+encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level, PyObject *indent_cache);
 static PyObject *
 _encoded_const(PyObject *obj);
 static void
@@ -1252,17 +1252,92 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
     return (PyObject *)s;
 }
 
+
+/* indent_cache is a list that contains intermixed values at even and odd
+ * positions:
+ *
+ * 2*k   : '\n' + indent * (k + initial_indent_level)
+ *         strings written after opening and before closing brackets
+ * 2*k-1 : item_separator + '\n' + indent * (k + initial_indent_level)
+ *         strings written between items
+ *
+ * Its size is always an odd number.
+ */
 static PyObject *
-_create_newline_indent(PyObject *indent, Py_ssize_t indent_level)
+create_indent_cache(PyEncoderObject *s, Py_ssize_t indent_level)
 {
     PyObject *newline_indent = PyUnicode_FromOrdinal('\n');
     if (newline_indent != NULL && indent_level) {
         PyUnicode_AppendAndDel(&newline_indent,
-                               PySequence_Repeat(indent, indent_level));
+                               PySequence_Repeat(s->indent, indent_level));
+    }
+    if (newline_indent == NULL) {
+        return NULL;
+    }
+    PyObject *indent_cache = PyList_New(1);
+    if (indent_cache == NULL) {
+        Py_DECREF(newline_indent);
+        return NULL;
     }
-    return newline_indent;
+    PyList_SET_ITEM(indent_cache, 0, newline_indent);
+    return indent_cache;
+}
+
+/* Extend indent_cache by adding values for the next level.
+ * It should have values for the indent_level-1 level before the call.
+ */
+static int
+update_indent_cache(PyEncoderObject *s,
+                    Py_ssize_t indent_level, PyObject *indent_cache)
+{
+    assert(indent_level * 2 == PyList_GET_SIZE(indent_cache) + 1);
+    assert(indent_level > 0);
+    PyObject *newline_indent = PyList_GET_ITEM(indent_cache, (indent_level - 1)*2);
+    newline_indent = PyUnicode_Concat(newline_indent, s->indent);
+    if (newline_indent == NULL) {
+        return -1;
+    }
+    PyObject *separator_indent = PyUnicode_Concat(s->item_separator, newline_indent);
+    if (separator_indent == NULL) {
+        Py_DECREF(newline_indent);
+        return -1;
+    }
+
+    if (PyList_Append(indent_cache, separator_indent) < 0 ||
+        PyList_Append(indent_cache, newline_indent) < 0)
+    {
+        Py_DECREF(separator_indent);
+        Py_DECREF(newline_indent);
+        return -1;
+    }
+    Py_DECREF(separator_indent);
+    Py_DECREF(newline_indent);
+    return 0;
 }
 
+static PyObject *
+get_item_separator(PyEncoderObject *s,
+                   Py_ssize_t indent_level, PyObject *indent_cache)
+{
+    assert(indent_level > 0);
+    if (indent_level * 2 > PyList_GET_SIZE(indent_cache)) {
+        if (update_indent_cache(s, indent_level, indent_cache) < 0) {
+            return NULL;
+        }
+    }
+    assert(indent_level * 2 < PyList_GET_SIZE(indent_cache));
+    return PyList_GET_ITEM(indent_cache, indent_level * 2 - 1);
+}
+
+static int
+write_newline_indent(PyUnicodeWriter *writer,
+                     Py_ssize_t indent_level, PyObject *indent_cache)
+{
+    PyObject *newline_indent = PyList_GET_ITEM(indent_cache, indent_level * 2);
+    return PyUnicodeWriter_WriteStr(writer, newline_indent);
+}
+
+
 static PyObject *
 encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
 {
@@ -1280,20 +1355,20 @@ encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
         return NULL;
     }
 
-    PyObject *newline_indent = NULL;
+    PyObject *indent_cache = NULL;
     if (self->indent != Py_None) {
-        newline_indent = _create_newline_indent(self->indent, indent_level);
-        if (newline_indent == NULL) {
+        indent_cache = create_indent_cache(self, indent_level);
+        if (indent_cache == NULL) {
             PyUnicodeWriter_Discard(writer);
             return NULL;
         }
     }
-    if (encoder_listencode_obj(self, writer, obj, newline_indent)) {
+    if (encoder_listencode_obj(self, writer, obj, indent_level, indent_cache)) {
         PyUnicodeWriter_Discard(writer);
-        Py_XDECREF(newline_indent);
+        Py_XDECREF(indent_cache);
         return NULL;
     }
-    Py_XDECREF(newline_indent);
+    Py_XDECREF(indent_cache);
 
     PyObject *str = PyUnicodeWriter_Finish(writer);
     if (str == NULL) {
@@ -1381,7 +1456,8 @@ _steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen)
 
 static int
 encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
-                       PyObject *obj, PyObject *newline_indent)
+                       PyObject *obj,
+                       Py_ssize_t indent_level, PyObject *indent_cache)
 {
     /* Encode Python object obj to a JSON term */
     PyObject *newobj;
@@ -1421,14 +1497,14 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
     else if (PyList_Check(obj) || PyTuple_Check(obj)) {
         if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
             return -1;
-        rv = encoder_listencode_list(s, writer, obj, newline_indent);
+        rv = encoder_listencode_list(s, writer, obj, indent_level, indent_cache);
         _Py_LeaveRecursiveCall();
         return rv;
     }
     else if (PyDict_Check(obj)) {
         if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
             return -1;
-        rv = encoder_listencode_dict(s, writer, obj, newline_indent);
+        rv = encoder_listencode_dict(s, writer, obj, indent_level, indent_cache);
         _Py_LeaveRecursiveCall();
         return rv;
     }
@@ -1462,7 +1538,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
             Py_XDECREF(ident);
             return -1;
         }
-        rv = encoder_listencode_obj(s, writer, newobj, newline_indent);
+        rv = encoder_listencode_obj(s, writer, newobj, indent_level, indent_cache);
         _Py_LeaveRecursiveCall();
 
         Py_DECREF(newobj);
@@ -1485,7 +1561,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
 static int
 encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *first,
                          PyObject *dct, PyObject *key, PyObject *value,
-                         PyObject *newline_indent,
+                         Py_ssize_t indent_level, PyObject *indent_cache,
                          PyObject *item_separator)
 {
     PyObject *keystr = NULL;
@@ -1541,7 +1617,7 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
     if (PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
         return -1;
     }
-    if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) {
+    if (encoder_listencode_obj(s, writer, value, indent_level, indent_cache) < 0) {
         _PyErr_FormatNote("when serializing %T item %R", dct, key);
         return -1;
     }
@@ -1550,15 +1626,14 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
 
 static int
 encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
-                        PyObject *dct, PyObject *newline_indent)
+                        PyObject *dct,
+                       Py_ssize_t indent_level, PyObject *indent_cache)
 {
     /* Encode Python dict dct a JSON term */
     PyObject *ident = NULL;
     PyObject *items = NULL;
     PyObject *key, *value;
     bool first = true;
-    PyObject *new_newline_indent = NULL;
-    PyObject *separator_indent = NULL;
 
     if (PyDict_GET_SIZE(dct) == 0) {
         /* Fast path */
@@ -1585,19 +1660,13 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
         goto bail;
     }
 
-    PyObject *current_item_separator = s->item_separator; // borrowed reference
+    PyObject *separator = s->item_separator; // borrowed reference
     if (s->indent != Py_None) {
-        new_newline_indent = PyUnicode_Concat(newline_indent, s->indent);
-        if (new_newline_indent == NULL) {
-            goto bail;
-        }
-        separator_indent = PyUnicode_Concat(current_item_separator, new_newline_indent);
-        if (separator_indent == NULL) {
-            goto bail;
-        }
-        // update item separator with a borrowed reference
-        current_item_separator = separator_indent;
-        if (PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
+        indent_level++;
+        separator = get_item_separator(s, indent_level, indent_cache);
+        if (separator == NULL ||
+            write_newline_indent(writer, indent_level, indent_cache) < 0)
+        {
             goto bail;
         }
     }
@@ -1618,8 +1687,8 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
             key = PyTuple_GET_ITEM(item, 0);
             value = PyTuple_GET_ITEM(item, 1);
             if (encoder_encode_key_value(s, writer, &first, dct, key, value,
-                                         new_newline_indent,
-                                         current_item_separator) < 0)
+                                         indent_level, indent_cache,
+                                         separator) < 0)
                 goto bail;
         }
         Py_CLEAR(items);
@@ -1628,8 +1697,8 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
         Py_ssize_t pos = 0;
         while (PyDict_Next(dct, &pos, &key, &value)) {
             if (encoder_encode_key_value(s, writer, &first, dct, key, value,
-                                         new_newline_indent,
-                                         current_item_separator) < 0)
+                                         indent_level, indent_cache,
+                                         separator) < 0)
                 goto bail;
         }
     }
@@ -1640,10 +1709,8 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
         Py_CLEAR(ident);
     }
     if (s->indent != Py_None) {
-        Py_CLEAR(new_newline_indent);
-        Py_CLEAR(separator_indent);
-
-        if (PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
+        indent_level--;
+        if (write_newline_indent(writer, indent_level, indent_cache) < 0) {
             goto bail;
         }
     }
@@ -1656,20 +1723,17 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
 bail:
     Py_XDECREF(items);
     Py_XDECREF(ident);
-    Py_XDECREF(separator_indent);
-    Py_XDECREF(new_newline_indent);
     return -1;
 }
 
 static int
 encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
-                        PyObject *seq, PyObject *newline_indent)
+                        PyObject *seq,
+                        Py_ssize_t indent_level, PyObject *indent_cache)
 {
     PyObject *ident = NULL;
     PyObject *s_fast = NULL;
     Py_ssize_t i;
-    PyObject *new_newline_indent = NULL;
-    PyObject *separator_indent = NULL;
 
     ident = NULL;
     s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
@@ -1702,20 +1766,13 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
 
     PyObject *separator = s->item_separator; // borrowed reference
     if (s->indent != Py_None) {
-        new_newline_indent = PyUnicode_Concat(newline_indent, s->indent);
-        if (new_newline_indent == NULL) {
-            goto bail;
-        }
-
-        if (PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
-            goto bail;
-        }
-
-        separator_indent = PyUnicode_Concat(separator, new_newline_indent);
-        if (separator_indent == NULL) {
+        indent_level++;
+        separator = get_item_separator(s, indent_level, indent_cache);
+        if (separator == NULL ||
+            write_newline_indent(writer, indent_level, indent_cache) < 0)
+        {
             goto bail;
         }
-        separator = separator_indent; // assign separator with borrowed reference
     }
     for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
         PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
@@ -1723,7 +1780,7 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
             if (PyUnicodeWriter_WriteStr(writer, separator) < 0)
                 goto bail;
         }
-        if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) {
+        if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) {
             _PyErr_FormatNote("when serializing %T item %zd", seq, i);
             goto bail;
         }
@@ -1735,9 +1792,8 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
     }
 
     if (s->indent != Py_None) {
-        Py_CLEAR(new_newline_indent);
-        Py_CLEAR(separator_indent);
-        if (PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
+        indent_level--;
+        if (write_newline_indent(writer, indent_level, indent_cache) < 0) {
             goto bail;
         }
     }
@@ -1751,8 +1807,6 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
 bail:
     Py_XDECREF(ident);
     Py_DECREF(s_fast);
-    Py_XDECREF(separator_indent);
-    Py_XDECREF(new_newline_indent);
     return -1;
 }