gh-139156: Optimize the UTF-7 encoder (#139253)

author Victor Stinner <vstinner@python.org>

Wed, 24 Sep 2025 15:57:29 +0000 (17:57 +0200)

committer GitHub <noreply@github.com>

Wed, 24 Sep 2025 15:57:29 +0000 (17:57 +0200)
author Victor Stinner <vstinner@python.org>
Wed, 24 Sep 2025 15:57:29 +0000 (17:57 +0200)
committer GitHub <noreply@github.com>
Wed, 24 Sep 2025 15:57:29 +0000 (17:57 +0200)
diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h

index 8dfcaedd5ef2e8341ec253eeb2846a8d38ad97a9..c85c01da89a2ffb6e517e84d8f40ff801ed37846 100644 (file)
--- a/Include/internal/pycore_unicodeobject.h
+++ b/Include/internal/pycore_unicodeobject.h
@@ -92,8 +92,6 @@ extern int _PyUnicodeWriter_FormatV(
  
  extern PyObject* _PyUnicode_EncodeUTF7(
      PyObject *unicode,          /* Unicode object */
-    int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
-    int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
      const char *errors);        /* error handling */
  
  /* --- UTF-8 Codecs ------------------------------------------------------- */
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c

index 33e262f2ba1e65dd6b7327bf3c0338edfa24728f..bdffeced7da5a98315fd1dbdd6d7cd3bfdf07b49 100644 (file)
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -671,7 +671,7 @@ _codecs_utf_7_encode_impl(PyObject *module, PyObject *str,
                            const char *errors)
  /*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/
  {
-    return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
+    return codec_tuple(_PyUnicode_EncodeUTF7(str, errors),
                         PyUnicode_GET_LENGTH(str));
  }
  
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 9c00e22ea24bd021bbee4a6bc4030e5aa67a8fdb..5f6384afd1b209a9178a9bfdeec8341a0ed56c60 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -4670,15 +4670,12 @@ char utf7_category[128] = {
  
  /* ENCODE_DIRECT: this character should be encoded as itself.  The
   * answer depends on whether we are encoding set O as itself, and also
- * on whether we are encoding whitespace as itself.  RFC2152 makes it
+ * on whether we are encoding whitespace as itself.  RFC 2152 makes it
   * clear that the answers to these questions vary between
   * applications, so this code needs to be flexible.  */
  
-#define ENCODE_DIRECT(c, directO, directWS)             \
-    ((c) < 128 && (c) > 0 &&                            \
-     ((utf7_category[(c)] == 0) ||                      \
-      (directWS && (utf7_category[(c)] == 2)) ||        \
-      (directO && (utf7_category[(c)] == 1))))
+#define ENCODE_DIRECT(c) \
+    ((c) < 128 && (c) > 0 && ((utf7_category[(c)] != 3)))
  
  PyObject *
  PyUnicode_DecodeUTF7(const char *s,
@@ -4895,8 +4892,6 @@ utf7Error:
  
  PyObject *
  _PyUnicode_EncodeUTF7(PyObject *str,
-                      int base64SetO,
-                      int base64WhiteSpace,
                        const char *errors)
  {
      Py_ssize_t len = PyUnicode_GET_LENGTH(str);
@@ -4923,7 +4918,7 @@ _PyUnicode_EncodeUTF7(PyObject *str,
          Py_UCS4 ch = PyUnicode_READ(kind, data, i);
  
          if (inShift) {
-            if (ENCODE_DIRECT(ch, !base64SetO, !base64WhiteSpace)) {
+            if (ENCODE_DIRECT(ch)) {
                  /* shifting out */
                  if (base64bits) { /* output remaining bits */
                      *out++ = TO_BASE64(base64buffer << (6-base64bits));
@@ -4947,7 +4942,7 @@ _PyUnicode_EncodeUTF7(PyObject *str,
                  *out++ = '+';
                          *out++ = '-';
              }
-            else if (ENCODE_DIRECT(ch, !base64SetO, !base64WhiteSpace)) {
+            else if (ENCODE_DIRECT(ch)) {
                  *out++ = (char) ch;
              }
              else {
author	Victor Stinner <vstinner@python.org>
	Wed, 24 Sep 2025 15:57:29 +0000 (17:57 +0200)
committer	GitHub <noreply@github.com>
	Wed, 24 Sep 2025 15:57:29 +0000 (17:57 +0200)
Include/internal/pycore_unicodeobject.h		patch \| blob \| blame \| history
Modules/_codecsmodule.c		patch \| blob \| blame \| history
Objects/unicodeobject.c		patch \| blob \| blame \| history