From 8d83b7df3ff1b65599b7f5c5acc8bfe6074723b2 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 24 Sep 2025 17:57:29 +0200 Subject: [PATCH] gh-139156: Optimize the UTF-7 encoder (#139253) Remove base64SetO and base64WhiteSpace parameters. --- Include/internal/pycore_unicodeobject.h | 2 -- Modules/_codecsmodule.c | 2 +- Objects/unicodeobject.c | 15 +++++---------- 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 8dfcaedd5ef2..c85c01da89a2 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -92,8 +92,6 @@ extern int _PyUnicodeWriter_FormatV( extern PyObject* _PyUnicode_EncodeUTF7( PyObject *unicode, /* Unicode object */ - int base64SetO, /* Encode RFC2152 Set O characters in base64 */ - int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */ const char *errors); /* error handling */ /* --- UTF-8 Codecs ------------------------------------------------------- */ diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index 33e262f2ba1e..bdffeced7da5 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -671,7 +671,7 @@ _codecs_utf_7_encode_impl(PyObject *module, PyObject *str, const char *errors) /*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/ { - return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors), + return codec_tuple(_PyUnicode_EncodeUTF7(str, errors), PyUnicode_GET_LENGTH(str)); } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 9c00e22ea24b..5f6384afd1b2 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4670,15 +4670,12 @@ char utf7_category[128] = { /* ENCODE_DIRECT: this character should be encoded as itself. The * answer depends on whether we are encoding set O as itself, and also - * on whether we are encoding whitespace as itself. RFC2152 makes it + * on whether we are encoding whitespace as itself. RFC 2152 makes it * clear that the answers to these questions vary between * applications, so this code needs to be flexible. */ -#define ENCODE_DIRECT(c, directO, directWS) \ - ((c) < 128 && (c) > 0 && \ - ((utf7_category[(c)] == 0) || \ - (directWS && (utf7_category[(c)] == 2)) || \ - (directO && (utf7_category[(c)] == 1)))) +#define ENCODE_DIRECT(c) \ + ((c) < 128 && (c) > 0 && ((utf7_category[(c)] != 3))) PyObject * PyUnicode_DecodeUTF7(const char *s, @@ -4895,8 +4892,6 @@ utf7Error: PyObject * _PyUnicode_EncodeUTF7(PyObject *str, - int base64SetO, - int base64WhiteSpace, const char *errors) { Py_ssize_t len = PyUnicode_GET_LENGTH(str); @@ -4923,7 +4918,7 @@ _PyUnicode_EncodeUTF7(PyObject *str, Py_UCS4 ch = PyUnicode_READ(kind, data, i); if (inShift) { - if (ENCODE_DIRECT(ch, !base64SetO, !base64WhiteSpace)) { + if (ENCODE_DIRECT(ch)) { /* shifting out */ if (base64bits) { /* output remaining bits */ *out++ = TO_BASE64(base64buffer << (6-base64bits)); @@ -4947,7 +4942,7 @@ _PyUnicode_EncodeUTF7(PyObject *str, *out++ = '+'; *out++ = '-'; } - else if (ENCODE_DIRECT(ch, !base64SetO, !base64WhiteSpace)) { + else if (ENCODE_DIRECT(ch)) { *out++ = (char) ch; } else { -- 2.47.3