See also :c:func:`PyUnicodeWriter_DecodeUTF8Stateful`.
+.. c:function:: int PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer, const char *str, Py_ssize_t size)
+
+ Write the ASCII string *str* into *writer*.
+
+ *size* is the string length in bytes. If *size* is equal to ``-1``, call
+ ``strlen(str)`` to get the string length.
+
+ *str* must only contain ASCII characters. The behavior is undefined if
+ *str* contains non-ASCII characters.
+
+ On success, return ``0``.
+ On error, set an exception, leave the writer unchanged, and return ``-1``.
+
+ .. versionadded:: next
+
.. c:function:: int PyUnicodeWriter_WriteWideChar(PyUnicodeWriter *writer, const wchar_t *str, Py_ssize_t size)
- Writer the wide string *str* into *writer*.
+ Write the wide string *str* into *writer*.
*size* is a number of wide characters. If *size* is equal to ``-1``, call
``wcslen(str)`` to get the string length.
functions as replacements for :c:func:`PySys_GetObject`.
(Contributed by Serhiy Storchaka in :gh:`108512`.)
+* Add :c:func:`PyUnicodeWriter_WriteASCII` function to write an ASCII string
+ into a :c:type:`PyUnicodeWriter`. The function is faster than
+ :c:func:`PyUnicodeWriter_WriteUTF8`, but has an undefined behavior if the
+ input string contains non-ASCII characters.
+ (Contributed by Victor Stinner in :gh:`133968`.)
+
+
Porting to Python 3.15
----------------------
PyUnicodeWriter *writer,
const char *str,
Py_ssize_t size);
+PyAPI_FUNC(int) PyUnicodeWriter_WriteASCII(
+ PyUnicodeWriter *writer,
+ const char *str,
+ Py_ssize_t size);
PyAPI_FUNC(int) PyUnicodeWriter_WriteWideChar(
PyUnicodeWriter *writer,
const wchar_t *str,
self.assertEqual(writer.finish(),
"ascii-latin1=\xE9-euro=\u20AC.")
+ def test_ascii(self):
+ writer = self.create_writer(0)
+ writer.write_ascii(b"Hello ", -1)
+ writer.write_ascii(b"", 0)
+ writer.write_ascii(b"Python! <truncated>", 6)
+ self.assertEqual(writer.finish(), "Hello Python")
+
def test_invalid_utf8(self):
writer = self.create_writer(0)
with self.assertRaises(UnicodeDecodeError):
import ctypes
import unittest
-import warnings
from ctypes import Structure, POINTER, pointer, c_char_p
# String-based "incomplete pointers" were implemented in ctypes 0.6.3 (2003, when
--- /dev/null
+Add :c:func:`PyUnicodeWriter_WriteASCII` function to write an ASCII string
+into a :c:type:`PyUnicodeWriter`. The function is faster than
+:c:func:`PyUnicodeWriter_WriteUTF8`, but has an undefined behavior if the
+input string contains non-ASCII characters. Patch by Victor Stinner.
int rv;
if (obj == Py_None) {
- return PyUnicodeWriter_WriteUTF8(writer, "null", 4);
+ return PyUnicodeWriter_WriteASCII(writer, "null", 4);
}
else if (obj == Py_True) {
- return PyUnicodeWriter_WriteUTF8(writer, "true", 4);
+ return PyUnicodeWriter_WriteASCII(writer, "true", 4);
}
else if (obj == Py_False) {
- return PyUnicodeWriter_WriteUTF8(writer, "false", 5);
+ return PyUnicodeWriter_WriteASCII(writer, "false", 5);
}
else if (PyUnicode_Check(obj)) {
PyObject *encoded = encoder_encode_string(s, obj);
if (PyDict_GET_SIZE(dct) == 0) {
/* Fast path */
- return PyUnicodeWriter_WriteUTF8(writer, "{}", 2);
+ return PyUnicodeWriter_WriteASCII(writer, "{}", 2);
}
if (s->markers != Py_None) {
return -1;
if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Py_DECREF(s_fast);
- return PyUnicodeWriter_WriteUTF8(writer, "[]", 2);
+ return PyUnicodeWriter_WriteASCII(writer, "[]", 2);
}
if (s->markers != Py_None) {
goto fail;
}
}
- if (PyUnicodeWriter_WriteUTF8(writer, "] ", 2) < 0) {
+ if (PyUnicodeWriter_WriteASCII(writer, "] ", 2) < 0) {
goto fail;
}
}
}
+static PyObject*
+writer_write_ascii(PyObject *self_raw, PyObject *args)
+{
+ WriterObject *self = (WriterObject *)self_raw;
+ if (writer_check(self) < 0) {
+ return NULL;
+ }
+
+ char *str;
+ Py_ssize_t size;
+ if (!PyArg_ParseTuple(args, "yn", &str, &size)) {
+ return NULL;
+ }
+
+ if (PyUnicodeWriter_WriteASCII(self->writer, str, size) < 0) {
+ return NULL;
+ }
+ Py_RETURN_NONE;
+}
+
+
static PyObject*
writer_write_widechar(PyObject *self_raw, PyObject *args)
{
static PyMethodDef writer_methods[] = {
{"write_char", _PyCFunction_CAST(writer_write_char), METH_VARARGS},
{"write_utf8", _PyCFunction_CAST(writer_write_utf8), METH_VARARGS},
+ {"write_ascii", _PyCFunction_CAST(writer_write_ascii), METH_VARARGS},
{"write_widechar", _PyCFunction_CAST(writer_write_widechar), METH_VARARGS},
{"write_ucs4", _PyCFunction_CAST(writer_write_ucs4), METH_VARARGS},
{"write_str", _PyCFunction_CAST(writer_write_str), METH_VARARGS},
for (Py_ssize_t i = 0; i < len; i++) {
if (i > 0) {
- if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
+ if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
return -1;
}
}
}
for (Py_ssize_t i = 0; i < len; i++) {
if (i > 0) {
- if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
+ if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
goto error;
}
}
}
if (len == 0) {
// for something like tuple[()] we should print a "()"
- if (PyUnicodeWriter_WriteUTF8(writer, "()", 2) < 0) {
+ if (PyUnicodeWriter_WriteASCII(writer, "()", 2) < 0) {
goto error;
}
}
for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(value); i++) {
PyObject *item = PyTuple_GET_ITEM(value, i);
if (i > 0) {
- if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
+ if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
PyUnicodeWriter_Discard(writer);
return NULL;
}
}
if (p == (PyObject *)&_PyNone_Type) {
- return PyUnicodeWriter_WriteUTF8(writer, "None", 4);
+ return PyUnicodeWriter_WriteASCII(writer, "None", 4);
}
if ((rc = PyObject_HasAttrWithError(p, &_Py_ID(__origin__))) > 0 &&
return 0;
}
+
+int
+PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer,
+ const char *str,
+ Py_ssize_t size)
+{
+ assert(writer != NULL);
+ _Py_AssertHoldsTstate();
+
+ _PyUnicodeWriter *priv_writer = (_PyUnicodeWriter*)writer;
+ return _PyUnicodeWriter_WriteASCIIString(priv_writer, str, size);
+}
+
+
int
PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
const char *str,
}
for (Py_ssize_t i = 0; i < len; i++) {
- if (i > 0 && PyUnicodeWriter_WriteUTF8(writer, " | ", 3) < 0) {
+ if (i > 0 && PyUnicodeWriter_WriteASCII(writer, " | ", 3) < 0) {
goto error;
}
PyObject *p = PyTuple_GET_ITEM(alias->args, i);
}
#if 0
- PyUnicodeWriter_WriteUTF8(writer, "|args=", 6);
+ PyUnicodeWriter_WriteASCII(writer, "|args=", 6);
PyUnicodeWriter_WriteRepr(writer, alias->args);
- PyUnicodeWriter_WriteUTF8(writer, "|h=", 3);
+ PyUnicodeWriter_WriteASCII(writer, "|h=", 3);
PyUnicodeWriter_WriteRepr(writer, alias->hashable_args);
if (alias->unhashable_args) {
- PyUnicodeWriter_WriteUTF8(writer, "|u=", 3);
+ PyUnicodeWriter_WriteASCII(writer, "|u=", 3);
PyUnicodeWriter_WriteRepr(writer, alias->unhashable_args);
}
#endif
for (Py_ssize_t i = 0; i < Py_MIN(length, 2); i++) {
if (i > 0) {
- if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
+ if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
goto error;
}
}
}
if (i == 0 && length > 2) {
- if (PyUnicodeWriter_WriteUTF8(writer, ", ...", 5) < 0) {
+ if (PyUnicodeWriter_WriteASCII(writer, ", ...", 5) < 0) {
goto error;
}
}
}
if (i > 0) {
- if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
+ if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
Py_DECREF(name);
Py_DECREF(value_repr);
goto error;
for (Py_ssize_t i = 0; i < Py_MIN(length, 2); i++) {
if (i > 0) {
- if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
+ if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
goto error;
}
}
}
if (i == 0 && length > 2) {
- if (PyUnicodeWriter_WriteUTF8(writer, ", ...", 5) < 0) {
+ if (PyUnicodeWriter_WriteASCII(writer, ", ...", 5) < 0) {
goto error;
}
}
}
if (i > 0) {
- if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
+ if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
Py_DECREF(name);
Py_DECREF(value_repr);
goto error;
return NULL;
}
- if (PyUnicodeWriter_WriteUTF8(writer, "<ContextVar name=", 17) < 0) {
+ if (PyUnicodeWriter_WriteASCII(writer, "<ContextVar name=", 17) < 0) {
goto error;
}
if (PyUnicodeWriter_WriteRepr(writer, self->var_name) < 0) {
}
if (self->var_default != NULL) {
- if (PyUnicodeWriter_WriteUTF8(writer, " default=", 9) < 0) {
+ if (PyUnicodeWriter_WriteASCII(writer, " default=", 9) < 0) {
goto error;
}
if (PyUnicodeWriter_WriteRepr(writer, self->var_default) < 0) {
if (writer == NULL) {
return NULL;
}
- if (PyUnicodeWriter_WriteUTF8(writer, "<Token", 6) < 0) {
+ if (PyUnicodeWriter_WriteASCII(writer, "<Token", 6) < 0) {
goto error;
}
if (self->tok_used) {
- if (PyUnicodeWriter_WriteUTF8(writer, " used", 5) < 0) {
+ if (PyUnicodeWriter_WriteASCII(writer, " used", 5) < 0) {
goto error;
}
}
- if (PyUnicodeWriter_WriteUTF8(writer, " var=", 5) < 0) {
+ if (PyUnicodeWriter_WriteASCII(writer, " var=", 5) < 0) {
goto error;
}
if (PyUnicodeWriter_WriteRepr(writer, (PyObject *)self->tok_var) < 0) {
}
if (key_or_null == NULL) {
- if (PyUnicodeWriter_WriteUTF8(writer, "NULL:\n", -1) < 0) {
+ if (PyUnicodeWriter_WriteASCII(writer, "NULL:\n", 6) < 0) {
goto error;
}
}
}
- if (PyUnicodeWriter_WriteUTF8(writer, "\n", 1) < 0) {
+ if (PyUnicodeWriter_WriteASCII(writer, "\n", 1) < 0) {
goto error;
}
}
goto error;
}
- if (PyUnicodeWriter_WriteUTF8(writer, "\n", 1) < 0) {
+ if (PyUnicodeWriter_WriteASCII(writer, "\n", 1) < 0) {
goto error;
}
}