]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-119182: Optimize PyUnicode_FromFormat() (#120796)
authorVictor Stinner <vstinner@python.org>
Thu, 20 Jun 2024 19:06:16 +0000 (21:06 +0200)
committerGitHub <noreply@github.com>
Thu, 20 Jun 2024 19:06:16 +0000 (19:06 +0000)
Use strchr() and ucs1lib_find_max_char() to optimize the code path
formatting sub-strings between '%' formats.

Objects/unicodeobject.c

index 1f8c89dd12a528f6486453f09212eecb4b6b5f8b..e6feed47fbb2bf2c468752a7bdc7f31ba08dea41 100644 (file)
@@ -2875,47 +2875,47 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
 static int
 unicode_from_format(_PyUnicodeWriter *writer, const char *format, va_list vargs)
 {
-    writer->min_length += strlen(format) + 100;
+    Py_ssize_t len = strlen(format);
+    writer->min_length += len + 100;
     writer->overallocate = 1;
 
-    va_list vargs2;
-    const char *f;
-
     // Copy varags to be able to pass a reference to a subfunction.
+    va_list vargs2;
     va_copy(vargs2, vargs);
 
-    for (f = format; *f; ) {
+    // _PyUnicodeWriter_WriteASCIIString() below requires the format string
+    // to be encoded to ASCII.
+    int is_ascii = (ucs1lib_find_max_char((Py_UCS1*)format, (Py_UCS1*)format + len) < 128);
+    if (!is_ascii) {
+        Py_ssize_t i;
+        for (i=0; i < len && (unsigned char)format[i] <= 127; i++);
+        PyErr_Format(PyExc_ValueError,
+            "PyUnicode_FromFormatV() expects an ASCII-encoded format "
+            "string, got a non-ASCII byte: 0x%02x",
+            (unsigned char)format[i]);
+        goto fail;
+    }
+
+    for (const char *f = format; *f; ) {
         if (*f == '%') {
             f = unicode_fromformat_arg(writer, f, &vargs2);
             if (f == NULL)
                 goto fail;
         }
         else {
-            const char *p;
-            Py_ssize_t len;
-
-            p = f;
-            do
-            {
-                if ((unsigned char)*p > 127) {
-                    PyErr_Format(PyExc_ValueError,
-                        "PyUnicode_FromFormatV() expects an ASCII-encoded format "
-                        "string, got a non-ASCII byte: 0x%02x",
-                        (unsigned char)*p);
-                    goto fail;
-                }
-                p++;
+            const char *p = strchr(f, '%');
+            if (p != NULL) {
+                len = p - f;
             }
-            while (*p != '\0' && *p != '%');
-            len = p - f;
-
-            if (*p == '\0')
+            else {
+                len = strlen(f);
                 writer->overallocate = 0;
+            }
 
-            if (_PyUnicodeWriter_WriteASCIIString(writer, f, len) < 0)
+            if (_PyUnicodeWriter_WriteASCIIString(writer, f, len) < 0) {
                 goto fail;
-
-            f = p;
+            }
+            f += len;
         }
     }
     va_end(vargs2);