]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-145264: Do not ignore excess Base64 data after the first padded quad (GH-145267)
authorSerhiy Storchaka <storchaka@gmail.com>
Sun, 22 Mar 2026 21:12:58 +0000 (23:12 +0200)
committerGitHub <noreply@github.com>
Sun, 22 Mar 2026 21:12:58 +0000 (23:12 +0200)
Base64 decoder (see binascii.a2b_base64(), base64.b64decode(), etc)
no longer ignores excess data after the first padded quad in non-strict
(default) mode.  Instead, in conformance with RFC 4648, it ignores the
pad character, "=", if it is present before the end of the encoded data.

Lib/test/test_binascii.py
Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst [new file with mode: 0644]
Modules/binascii.c

index d4879667c71461dbc4dd582c2d54295a03b09e3c..1dcd2b25c79087547c0fb324f4fe23626dc6b87a 100644 (file)
@@ -274,23 +274,21 @@ class BinASCIITest(unittest.TestCase):
 
     def test_base64_excess_data(self):
         # Test excess data exceptions
-        def assertExcessData(data, non_strict_expected,
-                             ignore_padchar_expected=None):
+        def assertExcessData(data, expected):
             assert_regex = r'(?i)Excess data'
             data = self.type2test(data)
             with self.assertRaisesRegex(binascii.Error, assert_regex):
                 binascii.a2b_base64(data, strict_mode=True)
             self.assertEqual(binascii.a2b_base64(data, strict_mode=False),
-                             non_strict_expected)
-            if ignore_padchar_expected is not None:
-                self.assertEqual(binascii.a2b_base64(data, strict_mode=True,
-                                                     ignorechars=b'='),
-                                 ignore_padchar_expected)
-            self.assertEqual(binascii.a2b_base64(data), non_strict_expected)
-
-        assertExcessData(b'ab==c', b'i')
-        assertExcessData(b'ab==cd', b'i', b'i\xb7\x1d')
-        assertExcessData(b'abc=d', b'i\xb7', b'i\xb7\x1d')
+                             expected)
+            self.assertEqual(binascii.a2b_base64(data, strict_mode=True,
+                                                 ignorechars=b'='),
+                             expected)
+            self.assertEqual(binascii.a2b_base64(data), expected)
+
+        assertExcessData(b'ab==c=', b'i\xb7')
+        assertExcessData(b'ab==cd', b'i\xb7\x1d')
+        assertExcessData(b'abc=d', b'i\xb7\x1d')
 
     def test_base64errors(self):
         # Test base64 with invalid padding
diff --git a/Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst b/Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst
new file mode 100644 (file)
index 0000000..22d53fe
--- /dev/null
@@ -0,0 +1,4 @@
+Base64 decoder (see :func:`binascii.a2b_base64`, :func:`base64.b64decode`, etc) no
+longer ignores excess data after the first padded quad in non-strict
+(default) mode.  Instead, in conformance with :rfc:`4648`, section 3.3, it now ignores
+the pad character, "=", if it is present before the end of the encoded data.
index 7907b74e36f085c9cbf8dc961850d0030a66b031..a57bf3ee6339f533c407ada56b537d579a9519c9 100644 (file)
@@ -800,40 +800,33 @@ fastpath:
         */
         if (this_ch == BASE64_PAD) {
             pads++;
-
-            if (strict_mode) {
-                if (quad_pos >= 2 && quad_pos + pads <= 4) {
-                    continue;
-                }
-                if (ignorechar(BASE64_PAD, ignorechars, ignorecache)) {
-                    continue;
-                }
-                if (quad_pos == 1) {
-                    /* Set an error below. */
-                    break;
-                }
-                state = get_binascii_state(module);
-                if (state) {
-                    PyErr_SetString(state->Error,
-                                    (quad_pos == 0 && ascii_data == data->buf)
-                                    ? "Leading padding not allowed"
-                                    : "Excess padding not allowed");
-                }
-                goto error_end;
+            if (quad_pos >= 2 && quad_pos + pads <= 4) {
+                continue;
             }
-            else {
-                if (quad_pos >= 2 && quad_pos + pads >= 4) {
-                    /* A pad sequence means we should not parse more input.
-                    ** We've already interpreted the data from the quad at this point.
-                    */
-                    goto done;
-                }
+            // See RFC 4648, section-3.3: "specifications MAY ignore the
+            // pad character, "=", treating it as non-alphabet data, if
+            // it is present before the end of the encoded data" and
+            // "the excess pad characters MAY also be ignored."
+            if (!strict_mode || ignorechar(BASE64_PAD, ignorechars, ignorecache)) {
                 continue;
             }
+            if (quad_pos == 1) {
+                /* Set an error below. */
+                break;
+            }
+            state = get_binascii_state(module);
+            if (state) {
+                PyErr_SetString(state->Error,
+                                (quad_pos == 0 && ascii_data == data->buf)
+                                ? "Leading padding not allowed"
+                                : "Excess padding not allowed");
+            }
+            goto error_end;
         }
 
         unsigned char v = table_a2b[this_ch];
         if (v >= 64) {
+            // See RFC 4648, section-3.3.
             if (strict_mode && !ignorechar(this_ch, ignorechars, ignorecache)) {
                 state = get_binascii_state(module);
                 if (state) {
@@ -844,7 +837,8 @@ fastpath:
             continue;
         }
 
-        // Characters that are not '=', in the middle of the padding, are not allowed
+        // Characters that are not '=', in the middle of the padding, are
+        // not allowed (except when they are). See RFC 4648, section-3.3.
         if (pads && strict_mode &&
             !ignorechar(BASE64_PAD, ignorechars, ignorecache))
         {
@@ -908,7 +902,6 @@ fastpath:
         goto error_end;
     }
 
-done:
     Py_XDECREF(table_obj);
     return PyBytesWriter_FinishWithPointer(writer, bin_data);