return Py_BuildValue("(Nn)", result, utf8_len);
}
-static PyObject *
-unicode_getutf8buffer(PyObject *self, PyObject *args)
-{
- PyObject *unicode;
- const char *errors = NULL;
- if(!PyArg_ParseTuple(args, "O|s", &unicode, &errors)) {
- return NULL;
- }
-
- Py_buffer buffer;
- if (_PyUnicode_GetUTF8Buffer(unicode, errors, &buffer) < 0) {
- return NULL;
- }
-
- assert(buffer.obj != NULL);
- assert(buffer.obj == unicode || PyBytes_CheckExact(buffer.obj));
-
- PyObject *result = PyBytes_FromStringAndSize(buffer.buf, buffer.len);
- PyBuffer_Release(&buffer);
- return result;
-}
-
-static PyObject *
-unicode_test_getutf8buffer(PyObject *self, PyObject *Py_UNUSED(ignored))
-{
- Py_buffer buf;
-
- // Test 1: ASCII string
- PyObject *str = PyUnicode_FromString("hello");
- if (str == NULL) {
- return NULL;
- }
- Py_ssize_t refcnt = Py_REFCNT(str);
-
- // _PyUnicode_GetUTF8Buffer() must not fail for ASCII string.
- int ret = _PyUnicode_GetUTF8Buffer(str, NULL, &buf);
- assert(ret == 0);
-
- if (buf.obj != str) {
- PyErr_Format(TestError,
- "buf.obj must be equal to str. (%s:%d)",
- __FILE__, __LINE__);
- PyBuffer_Release(&buf);
- Py_DECREF(str);
- return NULL;
- }
-
- if (buf.len != PyUnicode_GET_LENGTH(str)) {
- PyErr_Format(TestError,
- "buf.len must be equal to len(str). (%s:%d)",
- __FILE__, __LINE__);
- PyBuffer_Release(&buf);
- Py_DECREF(str);
- return NULL;
- }
- assert(((const char*)buf.buf)[5] == '\0');
-
- if ((Py_UCS1*)buf.buf != PyUnicode_1BYTE_DATA(str)) {
- PyErr_Format(TestError,
- "buf.buf must be equal to PyUnicode_1BYTE_DATA(str). (%s:%d)",
- __FILE__, __LINE__);
- PyBuffer_Release(&buf);
- Py_DECREF(str);
- return NULL;
- }
-
- if (refcnt + 1 != Py_REFCNT(str)) {
- PyErr_Format(TestError,
- "Py_REFCNT(str); expected %zd, got %zd. (%s:%d)",
- refcnt + 1, Py_REFCNT(str),
- __FILE__, __LINE__);
- PyBuffer_Release(&buf);
- Py_DECREF(str);
- return NULL;
- }
-
- PyBuffer_Release(&buf);
-
- if (refcnt != Py_REFCNT(str)) {
- PyErr_Format(TestError,
- "Py_REFCNT(str); expected %zd, got %zd. (%s:%d)",
- refcnt, Py_REFCNT(str),
- __FILE__, __LINE__);
- Py_DECREF(str);
- return NULL;
- }
-
- Py_DECREF(str);
-
- // Test 2: non-ASCII string
-
- // "hello" in Japanese. len(str)==5, len(str.encode()) == 15.
- str = PyUnicode_FromString("\xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1\xe3\x81\xaf");
- if (str == NULL) {
- return NULL;
- }
- refcnt = Py_REFCNT(str);
- assert(PyUnicode_GET_LENGTH(str) == 5);
-
- if (_PyUnicode_GetUTF8Buffer(str, NULL, &buf) < 0) {
- Py_DECREF(str);
- if (!PyErr_Occurred()) {
- PyErr_Format(TestError,
- "_PyUnicode_GetUTF8Buffer() returned nonzero "
- "without exception set. (%s:%d)",
- __FILE__, __LINE__);
- }
- return NULL;
- }
-
- if (!PyBytes_CheckExact(buf.obj)) {
- PyErr_Format(TestError,
- "buf.obj must be a bytes object, got %R (%s:%d)",
- buf.obj, __FILE__, __LINE__);
- PyBuffer_Release(&buf);
- Py_DECREF(str);
- return NULL;
- }
-
- if (buf.len != 15) {
- PyErr_Format(TestError,
- "Expected buf.len == 15, actual %zd (%s:%d)",
- buf.len, __FILE__, __LINE__);
- PyBuffer_Release(&buf);
- Py_DECREF(str);
- return NULL;
- }
- assert(((const char*)buf.buf)[15] == '\0');
-
- if (refcnt != Py_REFCNT(str)) {
- PyErr_Format(TestError,
- "Py_REFCNT(str) must not be changed. (%s:%d)",
- __FILE__, __LINE__);
- // Do not DECREF here because refcnt is broken.
- return NULL;
- }
-
- PyBuffer_Release(&buf);
-
- // Test 3: There is a UTF-8 cache
- // Reuse str of the previoss test.
-
- const char *cache = PyUnicode_AsUTF8(str);
- if (cache == NULL) {
- return NULL;
- }
-
- if (_PyUnicode_GetUTF8Buffer(str, NULL, &buf) < 0) {
- Py_DECREF(str);
- if (!PyErr_Occurred()) {
- PyErr_Format(TestError,
- "_PyUnicode_GetUTF8Buffer() returned nonzero "
- "without exception set. (%s:%d)",
- __FILE__, __LINE__);
- }
- return NULL;
- }
-
- if (buf.obj != str) {
- PyErr_Format(TestError,
- "buf.obj must be equal to str. (%s:%d)",
- __FILE__, __LINE__);
- PyBuffer_Release(&buf);
- Py_DECREF(str);
- return NULL;
- }
-
- if (buf.buf != cache) {
- PyErr_Format(TestError,
- "buf.buf must be equal to the UTF-8 cache (%s:%d)",
- __FILE__, __LINE__);
- PyBuffer_Release(&buf);
- Py_DECREF(str);
- return NULL;
- }
-
- if (buf.len != 15) {
- PyErr_Format(TestError,
- "Expected buf.len == 15, actual %zd (%s:%d)",
- buf.len, __FILE__, __LINE__);
- PyBuffer_Release(&buf);
- Py_DECREF(str);
- return NULL;
- }
- assert(((const char*)buf.buf)[15] == '\0');
-
- if (refcnt + 1 != Py_REFCNT(str)) {
- PyErr_Format(TestError,
- "Py_REFCNT(str); expected %zd, got %zd. (%s:%d)",
- refcnt + 1, Py_REFCNT(str),
- __FILE__, __LINE__);
- // Do not DECREF here because refcnt is broken.
- return NULL;
- }
-
- PyBuffer_Release(&buf);
-
- if (refcnt != Py_REFCNT(str)) {
- PyErr_Format(TestError,
- "Py_REFCNT(str); expected %zd, got %zd. (%s:%d)",
- refcnt, Py_REFCNT(str),
- __FILE__, __LINE__);
- // Do not DECREF here because refcnt is broken.
- return NULL;
- }
-
- Py_DECREF(str);
- Py_RETURN_NONE;
-}
-
static PyObject *
unicode_findchar(PyObject *self, PyObject *args)
{
{"unicode_asucs4", unicode_asucs4, METH_VARARGS},
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
{"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS},
- {"unicode_getutf8buffer", unicode_getutf8buffer, METH_VARARGS},
- {"unicode_test_getutf8buffer", unicode_test_getutf8buffer, METH_NOARGS},
{"unicode_findchar", unicode_findchar, METH_VARARGS},
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
{"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS},