/* UTF-8 encoder specialized for a Unicode kind to avoid the slow
PyUnicode_READ() macro. Delete some parts of the code depending on the kind:
UCS-1 strings don't need to handle surrogates for example. */
-Py_LOCAL_INLINE(char *)
-STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,
- PyObject *unicode,
+Py_LOCAL_INLINE(PyBytesWriter*)
+STRINGLIB(utf8_encoder)(PyObject *unicode,
const STRINGLIB_CHAR *data,
Py_ssize_t size,
_Py_error_handler error_handler,
- const char *errors)
+ const char *errors,
+ char **end)
{
- Py_ssize_t i; /* index into data of next input character */
- char *p; /* next free byte in output buffer */
#if STRINGLIB_SIZEOF_CHAR > 1
PyObject *error_handler_obj = NULL;
PyObject *exc = NULL;
if (size > PY_SSIZE_T_MAX / max_char_size) {
/* integer overflow */
PyErr_NoMemory();
+ *end = NULL;
return NULL;
}
- _PyBytesWriter_Init(writer);
- p = _PyBytesWriter_Alloc(writer, size * max_char_size);
- if (p == NULL)
+ PyBytesWriter *writer = PyBytesWriter_Create(size * max_char_size);
+ if (writer == NULL) {
+ *end = NULL;
return NULL;
+ }
+ /* next free byte in output buffer */
+ char *p = PyBytesWriter_GetData(writer);
+ Py_ssize_t i; /* index into data of next input character */
for (i = 0; i < size;) {
Py_UCS4 ch = data[i++];
case _Py_ERROR_BACKSLASHREPLACE:
/* subtract preallocated bytes */
- writer->min_size -= max_char_size * (endpos - startpos);
+ writer->size -= max_char_size * (endpos - startpos);
p = backslashreplace(writer, p,
unicode, startpos, endpos);
if (p == NULL)
case _Py_ERROR_XMLCHARREFREPLACE:
/* subtract preallocated bytes */
- writer->min_size -= max_char_size * (endpos - startpos);
+ writer->size -= max_char_size * (endpos - startpos);
p = xmlcharrefreplace(writer, p,
unicode, startpos, endpos);
if (p == NULL)
if (newpos < startpos) {
writer->overallocate = 1;
- p = _PyBytesWriter_Prepare(writer, p,
- max_char_size * (startpos - newpos));
- if (p == NULL)
+ p = PyBytesWriter_GrowAndUpdatePointer(writer,
+ max_char_size * (startpos - newpos),
+ p);
+ if (p == NULL) {
goto error;
+ }
}
else {
/* subtract preallocated bytes */
- writer->min_size -= max_char_size * (newpos - startpos);
+ writer->size -= max_char_size * (newpos - startpos);
/* Only overallocate the buffer if it's not the last write */
writer->overallocate = (newpos < size);
}
+ char *rep_str;
+ Py_ssize_t rep_len;
if (PyBytes_Check(rep)) {
- p = _PyBytesWriter_WriteBytes(writer, p,
- PyBytes_AS_STRING(rep),
- PyBytes_GET_SIZE(rep));
+ rep_str = PyBytes_AS_STRING(rep);
+ rep_len = PyBytes_GET_SIZE(rep);
}
else {
/* rep is unicode */
goto error;
}
- p = _PyBytesWriter_WriteBytes(writer, p,
- PyUnicode_DATA(rep),
- PyUnicode_GET_LENGTH(rep));
+ rep_str = PyUnicode_DATA(rep);
+ rep_len = PyUnicode_GET_LENGTH(rep);
}
- if (p == NULL)
+ p = PyBytesWriter_GrowAndUpdatePointer(writer, rep_len, p);
+ if (p == NULL) {
goto error;
+ }
+ memcpy(p, rep_str, rep_len);
+ p += rep_len;
Py_CLEAR(rep);
i = newpos;
Py_XDECREF(error_handler_obj);
Py_XDECREF(exc);
#endif
- return p;
+ *end = p;
+ return writer;
#if STRINGLIB_SIZEOF_CHAR > 1
error:
+ PyBytesWriter_Discard(writer);
Py_XDECREF(rep);
Py_XDECREF(error_handler_obj);
Py_XDECREF(exc);
+ *end = NULL;
return NULL;
#endif
}
/* Implementation of the "backslashreplace" error handler for 8-bit encodings:
ASCII, Latin1, UTF-8, etc. */
static char*
-backslashreplace(_PyBytesWriter *writer, char *str,
+backslashreplace(PyBytesWriter *writer, char *str,
PyObject *unicode, Py_ssize_t collstart, Py_ssize_t collend)
{
Py_ssize_t size, i;
size += incr;
}
- str = _PyBytesWriter_Prepare(writer, str, size);
- if (str == NULL)
+ str = PyBytesWriter_GrowAndUpdatePointer(writer, size, str);
+ if (str == NULL) {
return NULL;
+ }
/* generate replacement */
for (i = collstart; i < collend; ++i) {
/* Implementation of the "xmlcharrefreplace" error handler for 8-bit encodings:
ASCII, Latin1, UTF-8, etc. */
static char*
-xmlcharrefreplace(_PyBytesWriter *writer, char *str,
+xmlcharrefreplace(PyBytesWriter *writer, char *str,
PyObject *unicode, Py_ssize_t collstart, Py_ssize_t collend)
{
Py_ssize_t size, i;
size += incr;
}
- str = _PyBytesWriter_Prepare(writer, str, size);
- if (str == NULL)
+ str = PyBytesWriter_GrowAndUpdatePointer(writer, size, str);
+ if (str == NULL) {
return NULL;
+ }
/* generate replacement */
for (i = collstart; i < collend; ++i) {
const void *data = PyUnicode_DATA(unicode);
Py_ssize_t size = PyUnicode_GET_LENGTH(unicode);
- _PyBytesWriter writer;
+ PyBytesWriter *writer;
char *end;
switch (kind) {
case PyUnicode_1BYTE_KIND:
/* the string cannot be ASCII, or PyUnicode_UTF8() would be set */
assert(!PyUnicode_IS_ASCII(unicode));
- end = ucs1lib_utf8_encoder(&writer, unicode, data, size, error_handler, errors);
+ writer = ucs1lib_utf8_encoder(unicode, data, size,
+ error_handler, errors, &end);
break;
case PyUnicode_2BYTE_KIND:
- end = ucs2lib_utf8_encoder(&writer, unicode, data, size, error_handler, errors);
+ writer = ucs2lib_utf8_encoder(unicode, data, size,
+ error_handler, errors, &end);
break;
case PyUnicode_4BYTE_KIND:
- end = ucs4lib_utf8_encoder(&writer, unicode, data, size, error_handler, errors);
+ writer = ucs4lib_utf8_encoder(unicode, data, size,
+ error_handler, errors, &end);
break;
}
- if (end == NULL) {
- _PyBytesWriter_Dealloc(&writer);
+ if (writer == NULL) {
+ PyBytesWriter_Discard(writer);
return NULL;
}
- return _PyBytesWriter_Finish(&writer, end);
+ return PyBytesWriter_FinishWithPointer(writer, end);
}
static int
const void *data = PyUnicode_DATA(unicode);
Py_ssize_t size = PyUnicode_GET_LENGTH(unicode);
- _PyBytesWriter writer;
+ PyBytesWriter *writer;
char *end;
switch (kind) {
default:
Py_UNREACHABLE();
case PyUnicode_1BYTE_KIND:
- end = ucs1lib_utf8_encoder(&writer, unicode, data, size,
- _Py_ERROR_STRICT, NULL);
+ writer = ucs1lib_utf8_encoder(unicode, data, size,
+ _Py_ERROR_STRICT, NULL, &end);
break;
case PyUnicode_2BYTE_KIND:
- end = ucs2lib_utf8_encoder(&writer, unicode, data, size,
- _Py_ERROR_STRICT, NULL);
+ writer = ucs2lib_utf8_encoder(unicode, data, size,
+ _Py_ERROR_STRICT, NULL, &end);
break;
case PyUnicode_4BYTE_KIND:
- end = ucs4lib_utf8_encoder(&writer, unicode, data, size,
- _Py_ERROR_STRICT, NULL);
+ writer = ucs4lib_utf8_encoder(unicode, data, size,
+ _Py_ERROR_STRICT, NULL, &end);
break;
}
- if (end == NULL) {
- _PyBytesWriter_Dealloc(&writer);
+ if (writer == NULL) {
return -1;
}
- const char *start = writer.use_small_buffer ? writer.small_buffer :
- PyBytes_AS_STRING(writer.buffer);
+ const char *start = PyBytesWriter_GetData(writer);
Py_ssize_t len = end - start;
char *cache = PyMem_Malloc(len + 1);
if (cache == NULL) {
- _PyBytesWriter_Dealloc(&writer);
+ PyBytesWriter_Discard(writer);
PyErr_NoMemory();
return -1;
}
cache[len] = '\0';
PyUnicode_SET_UTF8_LENGTH(unicode, len);
PyUnicode_SET_UTF8(unicode, cache);
- _PyBytesWriter_Dealloc(&writer);
+ PyBytesWriter_Discard(writer);
return 0;
}
Py_ssize_t pos=0, size;
int kind;
const void *data;
- /* pointer into the output */
- char *str;
const char *encoding = (limit == 256) ? "latin-1" : "ascii";
const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)";
PyObject *error_handler_obj = NULL;
PyObject *exc = NULL;
_Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
PyObject *rep = NULL;
- /* output object */
- _PyBytesWriter writer;
size = PyUnicode_GET_LENGTH(unicode);
kind = PyUnicode_KIND(unicode);
if (size == 0)
return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
- _PyBytesWriter_Init(&writer);
- str = _PyBytesWriter_Alloc(&writer, size);
- if (str == NULL)
+ /* output object */
+ PyBytesWriter *writer = PyBytesWriter_Create(size);
+ if (writer == NULL) {
return NULL;
+ }
+ /* pointer into the output */
+ char *str = PyBytesWriter_GetData(writer);
while (pos < size) {
Py_UCS4 ch = PyUnicode_READ(kind, data, pos);
++collend;
/* Only overallocate the buffer if it's not the last write */
- writer.overallocate = (collend < size);
+ writer->overallocate = (collend < size);
/* cache callback name lookup (if not done yet, i.e. it's the first error) */
if (error_handler == _Py_ERROR_UNKNOWN)
case _Py_ERROR_BACKSLASHREPLACE:
/* subtract preallocated bytes */
- writer.min_size -= (collend - collstart);
- str = backslashreplace(&writer, str,
+ writer->size -= (collend - collstart);
+ str = backslashreplace(writer, str,
unicode, collstart, collend);
if (str == NULL)
goto onError;
case _Py_ERROR_XMLCHARREFREPLACE:
/* subtract preallocated bytes */
- writer.min_size -= (collend - collstart);
- str = xmlcharrefreplace(&writer, str,
+ writer->size -= (collend - collstart);
+ str = xmlcharrefreplace(writer, str,
unicode, collstart, collend);
if (str == NULL)
goto onError;
goto onError;
if (newpos < collstart) {
- writer.overallocate = 1;
- str = _PyBytesWriter_Prepare(&writer, str,
- collstart - newpos);
- if (str == NULL)
+ writer->overallocate = 1;
+ str = PyBytesWriter_GrowAndUpdatePointer(writer,
+ collstart - newpos,
+ str);
+ if (str == NULL) {
goto onError;
+ }
}
else {
/* subtract preallocated bytes */
- writer.min_size -= newpos - collstart;
+ writer->size -= newpos - collstart;
/* Only overallocate the buffer if it's not the last write */
- writer.overallocate = (newpos < size);
+ writer->overallocate = (newpos < size);
}
+ char *rep_str;
+ Py_ssize_t rep_len;
if (PyBytes_Check(rep)) {
/* Directly copy bytes result to output. */
- str = _PyBytesWriter_WriteBytes(&writer, str,
- PyBytes_AS_STRING(rep),
- PyBytes_GET_SIZE(rep));
+ rep_str = PyBytes_AS_STRING(rep);
+ rep_len = PyBytes_GET_SIZE(rep);
}
else {
assert(PyUnicode_Check(rep));
goto onError;
}
assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
- str = _PyBytesWriter_WriteBytes(&writer, str,
- PyUnicode_DATA(rep),
- PyUnicode_GET_LENGTH(rep));
+ rep_str = PyUnicode_DATA(rep);
+ rep_len = PyUnicode_GET_LENGTH(rep);
}
- if (str == NULL)
+
+ str = PyBytesWriter_GrowAndUpdatePointer(writer, rep_len, str);
+ if (str == NULL) {
goto onError;
+ }
+ memcpy(str, rep_str, rep_len);
+ str += rep_len;
pos = newpos;
Py_CLEAR(rep);
/* If overallocation was disabled, ensure that it was the last
write. Otherwise, we missed an optimization */
- assert(writer.overallocate || pos == size);
+ assert(writer->overallocate || pos == size);
}
}
Py_XDECREF(error_handler_obj);
Py_XDECREF(exc);
- return _PyBytesWriter_Finish(&writer, str);
+ return PyBytesWriter_FinishWithPointer(writer, str);
onError:
Py_XDECREF(rep);
- _PyBytesWriter_Dealloc(&writer);
+ PyBytesWriter_Discard(writer);
Py_XDECREF(error_handler_obj);
Py_XDECREF(exc);
return NULL;