]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-125196: Use PyUnicodeWriter for JSON encoder (#125249)
authorVictor Stinner <vstinner@python.org>
Thu, 10 Oct 2024 13:33:00 +0000 (15:33 +0200)
committerGitHub <noreply@github.com>
Thu, 10 Oct 2024 13:33:00 +0000 (15:33 +0200)
Replace the private _PyUnicodeWriter with the public PyUnicodeWriter.

Modules/_json.c

index 9e29de0f22465f0e459a2889446fbaab5739d453..ce0093ab431d05befda09e0ada29f1ed0806f66f 100644 (file)
@@ -86,11 +86,11 @@ encoder_dealloc(PyObject *self);
 static int
 encoder_clear(PyEncoderObject *self);
 static int
-encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent);
+encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent);
 static int
-encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent);
+encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent);
 static int
-encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent);
+encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent);
 static PyObject *
 _encoded_const(PyObject *obj);
 static void
@@ -1268,38 +1268,39 @@ encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
 {
     /* Python callable interface to encode_listencode_obj */
     static char *kwlist[] = {"obj", "_current_indent_level", NULL};
-    PyObject *obj, *result;
+    PyObject *obj;
     Py_ssize_t indent_level;
-    _PyUnicodeWriter writer;
 
     if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
-        &obj, &indent_level))
+                                     &obj, &indent_level))
         return NULL;
 
-    _PyUnicodeWriter_Init(&writer);
-    writer.overallocate = 1;
+    PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
+    if (writer == NULL) {
+        return NULL;
+    }
 
     PyObject *newline_indent = NULL;
     if (self->indent != Py_None) {
         newline_indent = _create_newline_indent(self->indent, indent_level);
         if (newline_indent == NULL) {
-            _PyUnicodeWriter_Dealloc(&writer);
+            PyUnicodeWriter_Discard(writer);
             return NULL;
         }
     }
-    if (encoder_listencode_obj(self, &writer, obj, newline_indent)) {
-        _PyUnicodeWriter_Dealloc(&writer);
+    if (encoder_listencode_obj(self, writer, obj, newline_indent)) {
+        PyUnicodeWriter_Discard(writer);
         Py_XDECREF(newline_indent);
         return NULL;
     }
     Py_XDECREF(newline_indent);
 
-    result = PyTuple_New(1);
-    if (result == NULL ||
-            PyTuple_SetItem(result, 0, _PyUnicodeWriter_Finish(&writer)) < 0) {
-        Py_XDECREF(result);
+    PyObject *str = PyUnicodeWriter_Finish(writer);
+    if (str == NULL) {
         return NULL;
     }
+    PyObject *result = PyTuple_Pack(1, str);
+    Py_DECREF(str);
     return result;
 }
 
@@ -1370,16 +1371,16 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj)
 }
 
 static int
-_steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen)
+_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen)
 {
     /* Append stolen and then decrement its reference count */
-    int rval = _PyUnicodeWriter_WriteStr(writer, stolen);
+    int rval = PyUnicodeWriter_WriteStr(writer, stolen);
     Py_DECREF(stolen);
     return rval;
 }
 
 static int
-encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
+encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
                        PyObject *obj, PyObject *newline_indent)
 {
     /* Encode Python object obj to a JSON term */
@@ -1387,13 +1388,13 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
     int rv;
 
     if (obj == Py_None) {
-      return _PyUnicodeWriter_WriteASCIIString(writer, "null", 4);
+      return PyUnicodeWriter_WriteUTF8(writer, "null", 4);
     }
     else if (obj == Py_True) {
-      return _PyUnicodeWriter_WriteASCIIString(writer, "true", 4);
+      return PyUnicodeWriter_WriteUTF8(writer, "true", 4);
     }
     else if (obj == Py_False) {
-      return _PyUnicodeWriter_WriteASCIIString(writer, "false", 5);
+      return PyUnicodeWriter_WriteUTF8(writer, "false", 5);
     }
     else if (PyUnicode_Check(obj)) {
         PyObject *encoded = encoder_encode_string(s, obj);
@@ -1402,6 +1403,10 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
         return _steal_accumulate(writer, encoded);
     }
     else if (PyLong_Check(obj)) {
+        if (PyLong_CheckExact(obj)) {
+            // Fast-path for exact integers
+            return PyUnicodeWriter_WriteRepr(writer, obj);
+        }
         PyObject *encoded = PyLong_Type.tp_repr(obj);
         if (encoded == NULL)
             return -1;
@@ -1478,7 +1483,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
 }
 
 static int
-encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first,
+encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *first,
                          PyObject *dct, PyObject *key, PyObject *value,
                          PyObject *newline_indent,
                          PyObject *item_separator)
@@ -1518,7 +1523,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
         *first = false;
     }
     else {
-        if (_PyUnicodeWriter_WriteStr(writer, item_separator) < 0) {
+        if (PyUnicodeWriter_WriteStr(writer, item_separator) < 0) {
             Py_DECREF(keystr);
             return -1;
         }
@@ -1533,7 +1538,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
     if (_steal_accumulate(writer, encoded) < 0) {
         return -1;
     }
-    if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
+    if (PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
         return -1;
     }
     if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) {
@@ -1544,7 +1549,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
 }
 
 static int
-encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
+encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
                         PyObject *dct, PyObject *newline_indent)
 {
     /* Encode Python dict dct a JSON term */
@@ -1555,8 +1560,10 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
     PyObject *new_newline_indent = NULL;
     PyObject *separator_indent = NULL;
 
-    if (PyDict_GET_SIZE(dct) == 0)  /* Fast path */
-        return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2);
+    if (PyDict_GET_SIZE(dct) == 0) {
+        /* Fast path */
+        return PyUnicodeWriter_WriteUTF8(writer, "{}", 2);
+    }
 
     if (s->markers != Py_None) {
         int has_key;
@@ -1574,8 +1581,9 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
         }
     }
 
-    if (_PyUnicodeWriter_WriteChar(writer, '{'))
+    if (PyUnicodeWriter_WriteChar(writer, '{')) {
         goto bail;
+    }
 
     PyObject *current_item_separator = s->item_separator; // borrowed reference
     if (s->indent != Py_None) {
@@ -1589,7 +1597,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
         }
         // update item separator with a borrowed reference
         current_item_separator = separator_indent;
-        if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
+        if (PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
             goto bail;
         }
     }
@@ -1635,13 +1643,14 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
         Py_CLEAR(new_newline_indent);
         Py_CLEAR(separator_indent);
 
-        if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
+        if (PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
             goto bail;
         }
     }
 
-    if (_PyUnicodeWriter_WriteChar(writer, '}'))
+    if (PyUnicodeWriter_WriteChar(writer, '}')) {
         goto bail;
+    }
     return 0;
 
 bail:
@@ -1653,7 +1662,7 @@ bail:
 }
 
 static int
-encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
+encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
                         PyObject *seq, PyObject *newline_indent)
 {
     PyObject *ident = NULL;
@@ -1668,7 +1677,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
         return -1;
     if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
         Py_DECREF(s_fast);
-        return _PyUnicodeWriter_WriteASCIIString(writer, "[]", 2);
+        return PyUnicodeWriter_WriteUTF8(writer, "[]", 2);
     }
 
     if (s->markers != Py_None) {
@@ -1687,8 +1696,9 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
         }
     }
 
-    if (_PyUnicodeWriter_WriteChar(writer, '['))
+    if (PyUnicodeWriter_WriteChar(writer, '[')) {
         goto bail;
+    }
 
     PyObject *separator = s->item_separator; // borrowed reference
     if (s->indent != Py_None) {
@@ -1697,7 +1707,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
             goto bail;
         }
 
-        if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
+        if (PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
             goto bail;
         }
 
@@ -1710,7 +1720,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
     for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
         PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
         if (i) {
-            if (_PyUnicodeWriter_WriteStr(writer, separator) < 0)
+            if (PyUnicodeWriter_WriteStr(writer, separator) < 0)
                 goto bail;
         }
         if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) {
@@ -1727,13 +1737,14 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
     if (s->indent != Py_None) {
         Py_CLEAR(new_newline_indent);
         Py_CLEAR(separator_indent);
-        if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
+        if (PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
             goto bail;
         }
     }
 
-    if (_PyUnicodeWriter_WriteChar(writer, ']'))
+    if (PyUnicodeWriter_WriteChar(writer, ']')) {
         goto bail;
+    }
     Py_DECREF(s_fast);
     return 0;