On success, return ``0``.
On error, set an exception, leave the writer unchanged, and return ``-1``.
+.. c:function:: int PyUnicodeWriter_WriteUCS4(PyUnicodeWriter *writer, Py_UCS4 *str, Py_ssize_t size)
+
+ Writer the UCS4 string *str* into *writer*.
+
+ *size* is a number of UCS4 characters.
+
+ On success, return ``0``.
+ On error, set an exception, leave the writer unchanged, and return ``-1``.
+
.. c:function:: int PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)
Call :c:func:`PyObject_Str` on *obj* and write the output into *writer*.
* :c:func:`PyUnicodeWriter_Finish`.
* :c:func:`PyUnicodeWriter_WriteChar`.
* :c:func:`PyUnicodeWriter_WriteUTF8`.
+ * :c:func:`PyUnicodeWriter_WriteUCS4`.
* :c:func:`PyUnicodeWriter_WriteWideChar`.
* :c:func:`PyUnicodeWriter_WriteStr`.
* :c:func:`PyUnicodeWriter_WriteRepr`.
PyUnicodeWriter *writer,
const wchar_t *str,
Py_ssize_t size);
+PyAPI_FUNC(int) PyUnicodeWriter_WriteUCS4(
+ PyUnicodeWriter *writer,
+ Py_UCS4 *str,
+ Py_ssize_t size);
PyAPI_FUNC(int) PyUnicodeWriter_WriteStr(
PyUnicodeWriter *writer,
writer.write_widechar("latin1=\xE9")
writer.write_widechar("-")
writer.write_widechar("euro=\u20AC")
+ writer.write_char("-")
+ writer.write_widechar("max=\U0010ffff")
writer.write_char('.')
- self.assertEqual(writer.finish(), "latin1=\xE9-euro=\u20AC.")
+ self.assertEqual(writer.finish(),
+ "latin1=\xE9-euro=\u20AC-max=\U0010ffff.")
+
+ def test_ucs4(self):
+ writer = self.create_writer(0)
+ writer.write_ucs4("ascii IGNORED", 5)
+ writer.write_char("-")
+ writer.write_ucs4("latin1=\xe9", 8)
+ writer.write_char("-")
+ writer.write_ucs4("euro=\u20ac", 6)
+ writer.write_char("-")
+ writer.write_ucs4("max=\U0010ffff", 5)
+ writer.write_char(".")
+ self.assertEqual(writer.finish(),
+ "ascii-latin1=\xE9-euro=\u20AC-max=\U0010ffff.")
+
+ # Test some special characters
+ writer = self.create_writer(0)
+ # Lone surrogate character
+ writer.write_ucs4("lone\uDC80", 5)
+ writer.write_char("-")
+ # Surrogate pair
+ writer.write_ucs4("pair\uDBFF\uDFFF", 5)
+ writer.write_char("-")
+ writer.write_ucs4("null[\0]", 7)
+ self.assertEqual(writer.finish(),
+ "lone\udc80-pair\udbff-null[\0]")
+
+ # invalid size
+ writer = self.create_writer(0)
+ with self.assertRaises(ValueError):
+ writer.write_ucs4("text", -1)
+
@unittest.skipIf(ctypes is None, 'need ctypes')
* :c:func:`PyUnicodeWriter_Finish`.
* :c:func:`PyUnicodeWriter_WriteChar`.
* :c:func:`PyUnicodeWriter_WriteUTF8`.
+* :c:func:`PyUnicodeWriter_WriteUCS4`.
+* :c:func:`PyUnicodeWriter_WriteWideChar`.
* :c:func:`PyUnicodeWriter_WriteStr`.
* :c:func:`PyUnicodeWriter_WriteRepr`.
* :c:func:`PyUnicodeWriter_WriteSubstring`.
* :c:func:`PyUnicodeWriter_Format`.
+* :c:func:`PyUnicodeWriter_DecodeUTF8Stateful`.
Patch by Victor Stinner.
}
+static PyObject*
+writer_write_ucs4(PyObject *self_raw, PyObject *args)
+{
+ WriterObject *self = (WriterObject *)self_raw;
+ if (writer_check(self) < 0) {
+ return NULL;
+ }
+
+ PyObject *str;
+ Py_ssize_t size;
+ if (!PyArg_ParseTuple(args, "Un", &str, &size)) {
+ return NULL;
+ }
+ Py_ssize_t len = PyUnicode_GET_LENGTH(str);
+ size = Py_MIN(size, len);
+
+ Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(str);
+ if (ucs4 == NULL) {
+ return NULL;
+ }
+
+ int res = PyUnicodeWriter_WriteUCS4(self->writer, ucs4, size);
+ PyMem_Free(ucs4);
+ if (res < 0) {
+ return NULL;
+ }
+ Py_RETURN_NONE;
+}
+
+
static PyObject*
writer_write_str(PyObject *self_raw, PyObject *args)
{
{"write_char", _PyCFunction_CAST(writer_write_char), METH_VARARGS},
{"write_utf8", _PyCFunction_CAST(writer_write_utf8), METH_VARARGS},
{"write_widechar", _PyCFunction_CAST(writer_write_widechar), METH_VARARGS},
+ {"write_ucs4", _PyCFunction_CAST(writer_write_ucs4), METH_VARARGS},
{"write_str", _PyCFunction_CAST(writer_write_str), METH_VARARGS},
{"write_repr", _PyCFunction_CAST(writer_write_repr), METH_VARARGS},
{"write_substring", _PyCFunction_CAST(writer_write_substring), METH_VARARGS},
if (!converted) {
return -1;
}
- PyObject *unicode = _PyUnicode_FromUCS4(converted, size);
- PyMem_Free(converted);
- int res = _PyUnicodeWriter_WriteStr(writer, unicode);
- Py_DECREF(unicode);
+ int res = PyUnicodeWriter_WriteUCS4(pub_writer, converted, size);
+ PyMem_Free(converted);
return res;
}
#endif
return res;
}
+
+int
+PyUnicodeWriter_WriteUCS4(PyUnicodeWriter *pub_writer,
+ Py_UCS4 *str,
+ Py_ssize_t size)
+{
+ _PyUnicodeWriter *writer = (_PyUnicodeWriter*)pub_writer;
+
+ if (size < 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "size must be positive");
+ return -1;
+ }
+
+ if (size == 0) {
+ return 0;
+ }
+
+ Py_UCS4 max_char = ucs4lib_find_max_char(str, str + size);
+
+ if (_PyUnicodeWriter_Prepare(writer, size, max_char) < 0) {
+ return -1;
+ }
+
+ int kind = writer->kind;
+ void *data = (Py_UCS1*)writer->data + writer->pos * kind;
+ if (kind == PyUnicode_1BYTE_KIND) {
+ _PyUnicode_CONVERT_BYTES(Py_UCS4, Py_UCS1,
+ str, str + size,
+ data);
+ }
+ else if (kind == PyUnicode_2BYTE_KIND) {
+ _PyUnicode_CONVERT_BYTES(Py_UCS4, Py_UCS2,
+ str, str + size,
+ data);
+ }
+ else {
+ memcpy(data, str, size * sizeof(Py_UCS4));
+ }
+ writer->pos += size;
+
+ return 0;
+}
+
+
PyObject*
PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size)
{
PyUnicodeWriter_Create(Py_ssize_t length)
{
if (length < 0) {
- PyErr_SetString(PyExc_TypeError,
+ PyErr_SetString(PyExc_ValueError,
"length must be positive");
return NULL;
}