gh-119182: Rewrite PyUnicodeWriter tests in Python (#120845)

author Victor Stinner <vstinner@python.org>

Fri, 21 Jun 2024 18:15:06 +0000 (20:15 +0200)

committer GitHub <noreply@github.com>

Fri, 21 Jun 2024 18:15:06 +0000 (20:15 +0200)
author Victor Stinner <vstinner@python.org>
Fri, 21 Jun 2024 18:15:06 +0000 (20:15 +0200)
committer GitHub <noreply@github.com>
Fri, 21 Jun 2024 18:15:06 +0000 (20:15 +0200)
diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py

index a69f817c515ba7d41e6861838d2ec9b60b00c823..36106b0730dd26a6ba3fb3efc81e7d6642c7afa3 100644 (file)
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -16,6 +16,10 @@ try:
      import _testinternalcapi
  except ImportError:
      _testinternalcapi = None
+try:
+    import ctypes
+except ImportError:
+    ctypes = None
  
  
  NULL = None
@@ -352,13 +356,13 @@ class CAPITest(unittest.TestCase):
          self.assertRaises(TypeError, fromobject, [])
          # CRASHES fromobject(NULL)
  
+    @unittest.skipIf(ctypes is None, 'need ctypes')
      def test_from_format(self):
          """Test PyUnicode_FromFormat()"""
          # Length modifiers "j" and "t" are not tested here because ctypes does
          # not expose types for intmax_t and ptrdiff_t.
          # _testlimitedcapi.test_string_from_format() has a wider coverage of all
          # formats.
-        import_helper.import_module('ctypes')
          from ctypes import (
              c_char_p,
              pythonapi, py_object, sizeof,
@@ -1676,5 +1680,149 @@ class CAPITest(unittest.TestCase):
                  self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
  
  
+class PyUnicodeWriterTest(unittest.TestCase):
+    def create_writer(self, size):
+        return _testcapi.PyUnicodeWriter(size)
+
+    def test_basic(self):
+        writer = self.create_writer(100)
+
+        # test PyUnicodeWriter_WriteUTF8()
+        writer.write_utf8(b'var', -1)
+
+        # test PyUnicodeWriter_WriteChar()
+        writer.write_char('=')
+
+        # test PyUnicodeWriter_WriteSubstring()
+        writer.write_substring("[long]", 1, 5);
+
+        # test PyUnicodeWriter_WriteStr()
+        writer.write_str(" value ")
+
+        # test PyUnicodeWriter_WriteRepr()
+        writer.write_repr("repr")
+
+        self.assertEqual(writer.finish(),
+                         "var=long value 'repr'")
+
+    def test_utf8(self):
+        writer = self.create_writer(0)
+        writer.write_utf8(b"ascii", -1)
+        writer.write_char('-')
+        writer.write_utf8(b"latin1=\xC3\xA9", -1)
+        writer.write_char('-')
+        writer.write_utf8(b"euro=\xE2\x82\xAC", -1)
+        writer.write_char('.')
+        self.assertEqual(writer.finish(),
+                         "ascii-latin1=\xE9-euro=\u20AC.")
+
+    def test_invalid_utf8(self):
+        writer = self.create_writer(0)
+        with self.assertRaises(UnicodeDecodeError):
+            writer.write_utf8(b"invalid=\xFF", -1)
+
+    def test_recover_utf8_error(self):
+        # test recovering from PyUnicodeWriter_WriteUTF8() error
+        writer = self.create_writer(0)
+        writer.write_utf8(b"value=", -1)
+
+        # write fails with an invalid string
+        with self.assertRaises(UnicodeDecodeError):
+            writer.write_utf8(b"invalid\xFF", -1)
+
+        # retry write with a valid string
+        writer.write_utf8(b"valid", -1)
+
+        self.assertEqual(writer.finish(),
+                         "value=valid")
+
+    def test_decode_utf8(self):
+        # test PyUnicodeWriter_DecodeUTF8Stateful()
+        writer = self.create_writer(0)
+        writer.decodeutf8stateful(b"ign\xFFore", -1, b"ignore")
+        writer.write_char('-')
+        writer.decodeutf8stateful(b"replace\xFF", -1, b"replace")
+        writer.write_char('-')
+
+        # incomplete trailing UTF-8 sequence
+        writer.decodeutf8stateful(b"incomplete\xC3", -1, b"replace")
+
+        self.assertEqual(writer.finish(),
+                         "ignore-replace\uFFFD-incomplete\uFFFD")
+
+    def test_decode_utf8_consumed(self):
+        # test PyUnicodeWriter_DecodeUTF8Stateful() with consumed
+        writer = self.create_writer(0)
+
+        # valid string
+        consumed = writer.decodeutf8stateful(b"text", -1, b"strict", True)
+        self.assertEqual(consumed, 4)
+        writer.write_char('-')
+
+        # non-ASCII
+        consumed = writer.decodeutf8stateful(b"\xC3\xA9-\xE2\x82\xAC", 6, b"strict", True)
+        self.assertEqual(consumed, 6)
+        writer.write_char('-')
+
+        # invalid UTF-8 (consumed is 0 on error)
+        with self.assertRaises(UnicodeDecodeError):
+            writer.decodeutf8stateful(b"invalid\xFF", -1, b"strict", True)
+
+        # ignore error handler
+        consumed = writer.decodeutf8stateful(b"more\xFF", -1, b"ignore", True)
+        self.assertEqual(consumed, 5)
+        writer.write_char('-')
+
+        # incomplete trailing UTF-8 sequence
+        consumed = writer.decodeutf8stateful(b"incomplete\xC3", -1, b"ignore", True)
+        self.assertEqual(consumed, 10)
+
+        self.assertEqual(writer.finish(), "text-\xE9-\u20AC-more-incomplete")
+
+    def test_widechar(self):
+        writer = self.create_writer(0)
+        writer.write_widechar("latin1=\xE9")
+        writer.write_widechar("-")
+        writer.write_widechar("euro=\u20AC")
+        writer.write_char('.')
+        self.assertEqual(writer.finish(), "latin1=\xE9-euro=\u20AC.")
+
+
+@unittest.skipIf(ctypes is None, 'need ctypes')
+class PyUnicodeWriterFormatTest(unittest.TestCase):
+    def create_writer(self, size):
+        return _testcapi.PyUnicodeWriter(size)
+
+    def writer_format(self, writer, *args):
+        from ctypes import c_char_p, pythonapi, c_int, c_void_p
+        _PyUnicodeWriter_Format = getattr(pythonapi, "PyUnicodeWriter_Format")
+        _PyUnicodeWriter_Format.argtypes = (c_void_p, c_char_p,)
+        _PyUnicodeWriter_Format.restype = c_int
+
+        if _PyUnicodeWriter_Format(writer.get_pointer(), *args) < 0:
+            raise ValueError("PyUnicodeWriter_Format failed")
+
+    def test_format(self):
+        from ctypes import c_int
+        writer = self.create_writer(0)
+        self.writer_format(writer, b'%s %i', b'abc', c_int(123))
+        writer.write_char('.')
+        self.assertEqual(writer.finish(), 'abc 123.')
+
+    def test_recover_error(self):
+        # test recovering from PyUnicodeWriter_Format() error
+        writer = self.create_writer(0)
+        self.writer_format(writer, b"%s ", b"Hello")
+
+        # PyUnicodeWriter_Format() fails with an invalid format string
+        with self.assertRaises(ValueError):
+            self.writer_format(writer, b"%s\xff", b"World")
+
+        # Retry PyUnicodeWriter_Format() with a valid format string
+        self.writer_format(writer, b"%s.", b"World")
+
+        self.assertEqual(writer.finish(), 'Hello World.')
+
+
  if __name__ == "__main__":
      unittest.main()
diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c

index da658b4129dffdd769b4971ef6216fe4e347aab9..c723e087baa308835208d94beed6d62f6d419d08 100644 (file)
--- a/Modules/_testcapi/unicode.c
+++ b/Modules/_testcapi/unicode.c
@@ -221,368 +221,292 @@ unicode_copycharacters(PyObject *self, PyObject *args)
  }
  
  
+// --- PyUnicodeWriter type -------------------------------------------------
+
+typedef struct {
+    PyObject_HEAD
+    PyUnicodeWriter *writer;
+} WriterObject;
+
+
  static PyObject *
-test_unicodewriter(PyObject *self, PyObject *Py_UNUSED(args))
+writer_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
  {
-    PyUnicodeWriter *writer = PyUnicodeWriter_Create(100);
-    if (writer == NULL) {
+    WriterObject *self = (WriterObject *)type->tp_alloc(type, 0);
+    if (!self) {
          return NULL;
      }
+    self->writer = NULL;
+    return (PyObject*)self;
+}
  
-    // test PyUnicodeWriter_WriteUTF8()
-    if (PyUnicodeWriter_WriteUTF8(writer, "var", -1) < 0) {
-        goto error;
-    }
  
-    // test PyUnicodeWriter_WriteChar()
-    if (PyUnicodeWriter_WriteChar(writer, '=') < 0) {
-        goto error;
-    }
+static int
+writer_init(PyObject *self_raw, PyObject *args, PyObject *kwargs)
+{
+    WriterObject *self = (WriterObject *)self_raw;
  
-    // test PyUnicodeWriter_WriteSubstring()
-    PyObject *str = PyUnicode_FromString("[long]");
-    if (str == NULL) {
-        goto error;
-    }
-    int ret = PyUnicodeWriter_WriteSubstring(writer, str, 1, 5);
-    Py_CLEAR(str);
-    if (ret < 0) {
-        goto error;
+    Py_ssize_t size;
+    if (!PyArg_ParseTuple(args, "n", &size)) {
+        return -1;
      }
  
-    // test PyUnicodeWriter_WriteStr()
-    str = PyUnicode_FromString(" value ");
-    if (str == NULL) {
-        goto error;
-    }
-    ret = PyUnicodeWriter_WriteStr(writer, str);
-    Py_CLEAR(str);
-    if (ret < 0) {
-        goto error;
+    if (self->writer) {
+        PyUnicodeWriter_Discard(self->writer);
      }
  
-    // test PyUnicodeWriter_WriteRepr()
-    str = PyUnicode_FromString("repr");
-    if (str == NULL) {
-        goto error;
-    }
-    ret = PyUnicodeWriter_WriteRepr(writer, str);
-    Py_CLEAR(str);
-    if (ret < 0) {
-        goto error;
+    self->writer = PyUnicodeWriter_Create(size);
+    if (self->writer == NULL) {
+        return -1;
      }
+    return 0;
+}
  
-    PyObject *result = PyUnicodeWriter_Finish(writer);
-    if (result == NULL) {
-        return NULL;
+
+static void
+writer_dealloc(PyObject *self_raw)
+{
+    WriterObject *self = (WriterObject *)self_raw;
+    PyTypeObject *tp = Py_TYPE(self);
+    if (self->writer) {
+        PyUnicodeWriter_Discard(self->writer);
      }
-    assert(PyUnicode_EqualToUTF8(result, "var=long value 'repr'"));
-    Py_DECREF(result);
+    tp->tp_free(self);
+    Py_DECREF(tp);
+}
  
-    Py_RETURN_NONE;
  
-error:
-    PyUnicodeWriter_Discard(writer);
-    return NULL;
+static inline int
+writer_check(WriterObject *self)
+{
+    if (self->writer == NULL) {
+        PyErr_SetString(PyExc_ValueError, "operation on finished writer");
+        return -1;
+    }
+    return 0;
  }
  
  
-static PyObject *
-test_unicodewriter_utf8(PyObject *self, PyObject *Py_UNUSED(args))
+static PyObject*
+writer_write_char(PyObject *self_raw, PyObject *args)
  {
-    PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
-    if (writer == NULL) {
+    WriterObject *self = (WriterObject *)self_raw;
+    if (writer_check(self) < 0) {
          return NULL;
      }
-    if (PyUnicodeWriter_WriteUTF8(writer, "ascii", -1) < 0) {
-        goto error;
-    }
-    if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
-        goto error;
-    }
-    if (PyUnicodeWriter_WriteUTF8(writer, "latin1=\xC3\xA9", -1) < 0) {
-        goto error;
-    }
-    if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
-        goto error;
-    }
-    if (PyUnicodeWriter_WriteUTF8(writer, "euro=\xE2\x82\xAC", -1) < 0) {
-        goto error;
+
+    PyObject *str;
+    if (!PyArg_ParseTuple(args, "U", &str)) {
+        return NULL;
      }
-    if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
-        goto error;
+    if (PyUnicode_GET_LENGTH(str) != 1) {
+        PyErr_SetString(PyExc_ValueError, "expect a single character");
      }
+    Py_UCS4 ch = PyUnicode_READ_CHAR(str, 0);
  
-    PyObject *result = PyUnicodeWriter_Finish(writer);
-    if (result == NULL) {
+    if (PyUnicodeWriter_WriteChar(self->writer, ch) < 0) {
          return NULL;
      }
-    assert(PyUnicode_EqualToUTF8(result,
-                                 "ascii-latin1=\xC3\xA9-euro=\xE2\x82\xAC."));
-    Py_DECREF(result);
-
      Py_RETURN_NONE;
-
-error:
-    PyUnicodeWriter_Discard(writer);
-    return NULL;
  }
  
  
-static PyObject *
-test_unicodewriter_invalid_utf8(PyObject *self, PyObject *Py_UNUSED(args))
+static PyObject*
+writer_write_utf8(PyObject *self_raw, PyObject *args)
  {
-    PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
-    if (writer == NULL) {
+    WriterObject *self = (WriterObject *)self_raw;
+    if (writer_check(self) < 0) {
          return NULL;
      }
-    assert(PyUnicodeWriter_WriteUTF8(writer, "invalid=\xFF", -1) < 0);
-    PyUnicodeWriter_Discard(writer);
  
-    assert(PyErr_ExceptionMatches(PyExc_UnicodeDecodeError));
-    PyErr_Clear();
+    char *str;
+    Py_ssize_t size;
+    if (!PyArg_ParseTuple(args, "yn", &str, &size)) {
+        return NULL;
+    }
  
+    if (PyUnicodeWriter_WriteUTF8(self->writer, str, size) < 0) {
+        return NULL;
+    }
      Py_RETURN_NONE;
  }
  
  
-static PyObject *
-test_unicodewriter_recover_error(PyObject *self, PyObject *Py_UNUSED(args))
+static PyObject*
+writer_write_widechar(PyObject *self_raw, PyObject *args)
  {
-    // test recovering from PyUnicodeWriter_WriteUTF8() error
-    PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
-    if (writer == NULL) {
+    WriterObject *self = (WriterObject *)self_raw;
+    if (writer_check(self) < 0) {
          return NULL;
      }
-    assert(PyUnicodeWriter_WriteUTF8(writer, "value=", -1) == 0);
-
-    // write fails with an invalid string
-    assert(PyUnicodeWriter_WriteUTF8(writer, "invalid\xFF", -1) < 0);
-    PyErr_Clear();
  
-    // retry write with a valid string
-    assert(PyUnicodeWriter_WriteUTF8(writer, "valid", -1) == 0);
+    PyObject *str;
+    if (!PyArg_ParseTuple(args, "U", &str)) {
+        return NULL;
+    }
  
-    PyObject *result = PyUnicodeWriter_Finish(writer);
-    if (result == NULL) {
+    Py_ssize_t size;
+    wchar_t *wstr = PyUnicode_AsWideCharString(str, &size);
+    if (wstr == NULL) {
          return NULL;
      }
-    assert(PyUnicode_EqualToUTF8(result, "value=valid"));
-    Py_DECREF(result);
  
+    int res = PyUnicodeWriter_WriteWideChar(self->writer, wstr, size);
+    PyMem_Free(wstr);
+    if (res < 0) {
+        return NULL;
+    }
      Py_RETURN_NONE;
  }
  
  
-static PyObject *
-test_unicodewriter_decode_utf8(PyObject *self, PyObject *Py_UNUSED(args))
+static PyObject*
+writer_write_str(PyObject *self_raw, PyObject *args)
  {
-    // test PyUnicodeWriter_DecodeUTF8Stateful()
-    PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
-    if (writer == NULL) {
+    WriterObject *self = (WriterObject *)self_raw;
+    if (writer_check(self) < 0) {
          return NULL;
      }
-    if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "ign\xFFore", -1, "ignore", NULL) < 0) {
-        goto error;
-    }
-    if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
-        goto error;
-    }
-    if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "replace\xFF", -1, "replace", NULL) < 0) {
-        goto error;
-    }
-    if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
-        goto error;
-    }
  
-    // incomplete trailing UTF-8 sequence
-    if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "incomplete\xC3", -1, "replace", NULL) < 0) {
-        goto error;
+    PyObject *obj;
+    if (!PyArg_ParseTuple(args, "O", &obj)) {
+        return NULL;
      }
  
-    PyObject *result = PyUnicodeWriter_Finish(writer);
-    if (result == NULL) {
+    if (PyUnicodeWriter_WriteStr(self->writer, obj) < 0) {
          return NULL;
      }
-    assert(PyUnicode_EqualToUTF8(result,
-                                 "ignore-replace\xef\xbf\xbd"
-                                 "-incomplete\xef\xbf\xbd"));
-    Py_DECREF(result);
-
      Py_RETURN_NONE;
-
-error:
-    PyUnicodeWriter_Discard(writer);
-    return NULL;
  }
  
  
-static PyObject *
-test_unicodewriter_decode_utf8_consumed(PyObject *self, PyObject *Py_UNUSED(args))
+static PyObject*
+writer_write_repr(PyObject *self_raw, PyObject *args)
  {
-    // test PyUnicodeWriter_DecodeUTF8Stateful()
-    PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
-    if (writer == NULL) {
+    WriterObject *self = (WriterObject *)self_raw;
+    if (writer_check(self) < 0) {
          return NULL;
      }
-    Py_ssize_t consumed;
  
-    // valid string
-    consumed = 12345;
-    if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "text", -1, NULL, &consumed) < 0) {
-        goto error;
-    }
-    assert(consumed == 4);
-    if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
-        goto error;
+    PyObject *obj;
+    if (!PyArg_ParseTuple(args, "O", &obj)) {
+        return NULL;
      }
  
-    // non-ASCII
-    consumed = 12345;
-    if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "\xC3\xA9-\xE2\x82\xAC", 6, NULL, &consumed) < 0) {
-        goto error;
-    }
-    assert(consumed == 6);
-    if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
-        goto error;
+    if (PyUnicodeWriter_WriteRepr(self->writer, obj) < 0) {
+        return NULL;
      }
+    Py_RETURN_NONE;
+}
  
-    // consumed is 0 if write fails
-    consumed = 12345;
-    assert(PyUnicodeWriter_DecodeUTF8Stateful(writer, "invalid\xFF", -1, NULL, &consumed) < 0);
-    PyErr_Clear();
-    assert(consumed == 0);
  
-    // ignore error handler
-    consumed = 12345;
-    if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "more\xFF", -1, "ignore", &consumed) < 0) {
-        goto error;
-    }
-    assert(consumed == 5);
-    if (PyUnicodeWriter_WriteChar(writer, '-') < 0) {
-        goto error;
+static PyObject*
+writer_write_substring(PyObject *self_raw, PyObject *args)
+{
+    WriterObject *self = (WriterObject *)self_raw;
+    if (writer_check(self) < 0) {
+        return NULL;
      }
  
-    // incomplete trailing UTF-8 sequence
-    consumed = 12345;
-    if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "incomplete\xC3", -1, "ignore", &consumed) < 0) {
-        goto error;
+    PyObject *str;
+    Py_ssize_t start, end;
+    if (!PyArg_ParseTuple(args, "Unn", &str, &start, &end)) {
+        return NULL;
      }
-    assert(consumed == 10);
  
-    PyObject *result = PyUnicodeWriter_Finish(writer);
-    if (result == NULL) {
+    if (PyUnicodeWriter_WriteSubstring(self->writer, str, start, end) < 0) {
          return NULL;
      }
-    assert(PyUnicode_EqualToUTF8(result,
-                                 "text-\xC3\xA9-\xE2\x82\xAC-"
-                                 "more-incomplete"));
-    Py_DECREF(result);
-
      Py_RETURN_NONE;
-
-error:
-    PyUnicodeWriter_Discard(writer);
-    return NULL;
  }
  
  
-static PyObject *
-test_unicodewriter_format(PyObject *self, PyObject *Py_UNUSED(args))
+static PyObject*
+writer_decodeutf8stateful(PyObject *self_raw, PyObject *args)
  {
-    PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
-    if (writer == NULL) {
+    WriterObject *self = (WriterObject *)self_raw;
+    if (writer_check(self) < 0) {
          return NULL;
      }
  
-    // test PyUnicodeWriter_Format()
-    if (PyUnicodeWriter_Format(writer, "%s %i", "Hello", 123) < 0) {
-        goto error;
-    }
-
-    // test PyUnicodeWriter_WriteChar()
-    if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
-        goto error;
+    const char *str;
+    Py_ssize_t len;
+    const char *errors;
+    int use_consumed = 0;
+    if (!PyArg_ParseTuple(args, "yny|i", &str, &len, &errors, &use_consumed)) {
+        return NULL;
      }
  
-    PyObject *result = PyUnicodeWriter_Finish(writer);
-    if (result == NULL) {
+    Py_ssize_t consumed = 12345;
+    Py_ssize_t *pconsumed = use_consumed ? &consumed : NULL;
+    if (PyUnicodeWriter_DecodeUTF8Stateful(self->writer, str, len,
+                                           errors, pconsumed) < 0) {
+        if (use_consumed) {
+            assert(consumed == 0);
+        }
          return NULL;
      }
-    assert(PyUnicode_EqualToUTF8(result, "Hello 123."));
-    Py_DECREF(result);
  
+    if (use_consumed) {
+        return PyLong_FromSsize_t(consumed);
+    }
      Py_RETURN_NONE;
-
-error:
-    PyUnicodeWriter_Discard(writer);
-    return NULL;
  }
  
  
-static PyObject *
-test_unicodewriter_format_recover_error(PyObject *self, PyObject *Py_UNUSED(args))
+static PyObject*
+writer_get_pointer(PyObject *self_raw, PyObject *args)
  {
-    // test recovering from PyUnicodeWriter_Format() error
-    PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
-    if (writer == NULL) {
+    WriterObject *self = (WriterObject *)self_raw;
+    if (writer_check(self) < 0) {
          return NULL;
      }
  
-    assert(PyUnicodeWriter_Format(writer, "%s ", "Hello") == 0);
-
-    // PyUnicodeWriter_Format() fails with an invalid format string
-    assert(PyUnicodeWriter_Format(writer, "%s\xff", "World") < 0);
-    PyErr_Clear();
+    return PyLong_FromVoidPtr(self->writer);
+}
  
-    // Retry PyUnicodeWriter_Format() with a valid format string
-    assert(PyUnicodeWriter_Format(writer, "%s.", "World") == 0);
  
-    PyObject *result = PyUnicodeWriter_Finish(writer);
-    if (result == NULL) {
+static PyObject*
+writer_finish(PyObject *self_raw, PyObject *Py_UNUSED(args))
+{
+    WriterObject *self = (WriterObject *)self_raw;
+    if (writer_check(self) < 0) {
          return NULL;
      }
-    assert(PyUnicode_EqualToUTF8(result, "Hello World."));
-    Py_DECREF(result);
  
-    Py_RETURN_NONE;
+    PyObject *str = PyUnicodeWriter_Finish(self->writer);
+    self->writer = NULL;
+    return str;
  }
  
  
-static PyObject *
-test_unicodewriter_widechar(PyObject *self, PyObject *Py_UNUSED(args))
-{
-    PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
-    if (writer == NULL) {
-        return NULL;
-    }
-    if (PyUnicodeWriter_WriteWideChar(writer, L"latin1=\xE9 IGNORED", 8) < 0) {
-        goto error;
-    }
-    if (PyUnicodeWriter_WriteWideChar(writer, L"-", 1) < 0) {
-        goto error;
-    }
-    if (PyUnicodeWriter_WriteWideChar(writer, L"euro=\u20AC", -1) < 0) {
-        goto error;
-    }
-    if (PyUnicodeWriter_WriteChar(writer, '.') < 0) {
-        goto error;
-    }
-
-    PyObject *result = PyUnicodeWriter_Finish(writer);
-    if (result == NULL) {
-        return NULL;
-    }
-    assert(PyUnicode_EqualToUTF8(result,
-                                 "latin1=\xC3\xA9-euro=\xE2\x82\xAC."));
-    Py_DECREF(result);
+static PyMethodDef writer_methods[] = {
+    {"write_char", _PyCFunction_CAST(writer_write_char), METH_VARARGS},
+    {"write_utf8", _PyCFunction_CAST(writer_write_utf8), METH_VARARGS},
+    {"write_widechar", _PyCFunction_CAST(writer_write_widechar), METH_VARARGS},
+    {"write_str", _PyCFunction_CAST(writer_write_str), METH_VARARGS},
+    {"write_repr", _PyCFunction_CAST(writer_write_repr), METH_VARARGS},
+    {"write_substring", _PyCFunction_CAST(writer_write_substring), METH_VARARGS},
+    {"decodeutf8stateful", _PyCFunction_CAST(writer_decodeutf8stateful), METH_VARARGS},
+    {"get_pointer", _PyCFunction_CAST(writer_get_pointer), METH_VARARGS},
+    {"finish", _PyCFunction_CAST(writer_finish), METH_NOARGS},
+    {NULL,              NULL}           /* sentinel */
+};
  
-    Py_RETURN_NONE;
+static PyType_Slot Writer_Type_slots[] = {
+    {Py_tp_new, writer_new},
+    {Py_tp_init, writer_init},
+    {Py_tp_dealloc, writer_dealloc},
+    {Py_tp_methods, writer_methods},
+    {0, 0},  /* sentinel */
+};
  
-error:
-    PyUnicodeWriter_Discard(writer);
-    return NULL;
-}
+static PyType_Spec Writer_spec = {
+    .name = "_testcapi.PyUnicodeWriter",
+    .basicsize = sizeof(WriterObject),
+    .flags = Py_TPFLAGS_DEFAULT,
+    .slots = Writer_Type_slots,
+};
  
  
  static PyMethodDef TestMethods[] = {
@@ -593,15 +517,6 @@ static PyMethodDef TestMethods[] = {
      {"unicode_asucs4copy",       unicode_asucs4copy,             METH_VARARGS},
      {"unicode_asutf8",           unicode_asutf8,                 METH_VARARGS},
      {"unicode_copycharacters",   unicode_copycharacters,         METH_VARARGS},
-    {"test_unicodewriter",       test_unicodewriter,             METH_NOARGS},
-    {"test_unicodewriter_utf8",  test_unicodewriter_utf8,        METH_NOARGS},
-    {"test_unicodewriter_invalid_utf8", test_unicodewriter_invalid_utf8, METH_NOARGS},
-    {"test_unicodewriter_recover_error", test_unicodewriter_recover_error, METH_NOARGS},
-    {"test_unicodewriter_decode_utf8", test_unicodewriter_decode_utf8, METH_NOARGS},
-    {"test_unicodewriter_decode_utf8_consumed", test_unicodewriter_decode_utf8_consumed, METH_NOARGS},
-    {"test_unicodewriter_format", test_unicodewriter_format,     METH_NOARGS},
-    {"test_unicodewriter_format_recover_error", test_unicodewriter_format_recover_error, METH_NOARGS},
-    {"test_unicodewriter_widechar", test_unicodewriter_widechar, METH_NOARGS},
      {NULL},
  };
  
@@ -610,5 +525,16 @@ _PyTestCapi_Init_Unicode(PyObject *m) {
      if (PyModule_AddFunctions(m, TestMethods) < 0) {
          return -1;
      }
+
+    PyTypeObject *writer_type = (PyTypeObject *)PyType_FromSpec(&Writer_spec);
+    if (writer_type == NULL) {
+        return -1;
+    }
+    if (PyModule_AddType(m, writer_type) < 0) {
+        Py_DECREF(writer_type);
+        return -1;
+    }
+    Py_DECREF(writer_type);
+
      return 0;
  }
author	Victor Stinner <vstinner@python.org>
	Fri, 21 Jun 2024 18:15:06 +0000 (20:15 +0200)
committer	GitHub <noreply@github.com>
	Fri, 21 Jun 2024 18:15:06 +0000 (20:15 +0200)
Lib/test/test_capi/test_unicode.py		patch \| blob \| blame \| history
Modules/_testcapi/unicode.c		patch \| blob \| blame \| history