From e9c538dd54d1e270bcb33e10f919cc9e8e7cb934 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 25 Sep 2025 11:42:16 +0200 Subject: [PATCH] gh-139156: Optimize _PyUnicode_EncodeCharmap() (#139306) Specialize _PyUnicode_EncodeCharmap() for EncodingMapType which is used by Python codecs such as iso8859_15. --- Objects/unicodeobject.c | 75 +++++++++++++++++++++++++++++++++-------- 1 file changed, 61 insertions(+), 14 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5f6384afd1b2..11ba147a7446 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6435,6 +6435,8 @@ _PyUnicode_EncodeUTF16(PyObject *str, #endif if (kind == PyUnicode_1BYTE_KIND) { + // gh-139156: Don't use PyBytesWriter API here since it has an overhead + // on short strings PyObject *v = PyBytes_FromStringAndSize(NULL, nsize * 2); if (v == NULL) { return NULL; @@ -8852,11 +8854,15 @@ charmapencode_output(Py_UCS4 c, PyObject *mapping, if (Py_IS_TYPE(mapping, &EncodingMapType)) { int res = encoding_map_lookup(c, mapping); Py_ssize_t requiredsize = *outpos+1; - if (res == -1) + if (res == -1) { return enc_FAILED; - if (outsize adjust input position */ + ++inpos; + continue; + +enc_FAILED: if (charmap_encoding_error(unicode, &inpos, mapping, &exc, &error_handler, &error_handler_obj, errors, writer, &respos)) { goto onError; } + outstart = _PyBytesWriter_GetData(writer); + outsize = _PyBytesWriter_GetSize(writer); } - else { - /* done with this character => adjust input position */ - ++inpos; + } + else { + while (inpos adjust input position */ + ++inpos; + } } } -- 2.47.3