From: Victor Stinner Date: Thu, 25 Sep 2025 09:42:16 +0000 (+0200) Subject: gh-139156: Optimize _PyUnicode_EncodeCharmap() (#139306) X-Git-Tag: v3.15.0a1~205 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=e9c538dd54d1e270bcb33e10f919cc9e8e7cb934;p=thirdparty%2FPython%2Fcpython.git gh-139156: Optimize _PyUnicode_EncodeCharmap() (#139306) Specialize _PyUnicode_EncodeCharmap() for EncodingMapType which is used by Python codecs such as iso8859_15. --- diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5f6384afd1b2..11ba147a7446 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6435,6 +6435,8 @@ _PyUnicode_EncodeUTF16(PyObject *str, #endif if (kind == PyUnicode_1BYTE_KIND) { + // gh-139156: Don't use PyBytesWriter API here since it has an overhead + // on short strings PyObject *v = PyBytes_FromStringAndSize(NULL, nsize * 2); if (v == NULL) { return NULL; @@ -8852,11 +8854,15 @@ charmapencode_output(Py_UCS4 c, PyObject *mapping, if (Py_IS_TYPE(mapping, &EncodingMapType)) { int res = encoding_map_lookup(c, mapping); Py_ssize_t requiredsize = *outpos+1; - if (res == -1) + if (res == -1) { return enc_FAILED; - if (outsize adjust input position */ + ++inpos; + continue; + +enc_FAILED: if (charmap_encoding_error(unicode, &inpos, mapping, &exc, &error_handler, &error_handler_obj, errors, writer, &respos)) { goto onError; } + outstart = _PyBytesWriter_GetData(writer); + outsize = _PyBytesWriter_GetSize(writer); } - else { - /* done with this character => adjust input position */ - ++inpos; + } + else { + while (inpos adjust input position */ + ++inpos; + } } }