From: Serhiy Storchaka Date: Sun, 22 Mar 2026 21:12:58 +0000 (+0200) Subject: gh-145264: Do not ignore excess Base64 data after the first padded quad (GH-145267) X-Git-Tag: v3.15.0a8~211 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=4561f6418a691b3e89aef0901f53fe0dfb7f7c0e;p=thirdparty%2FPython%2Fcpython.git gh-145264: Do not ignore excess Base64 data after the first padded quad (GH-145267) Base64 decoder (see binascii.a2b_base64(), base64.b64decode(), etc) no longer ignores excess data after the first padded quad in non-strict (default) mode. Instead, in conformance with RFC 4648, it ignores the pad character, "=", if it is present before the end of the encoded data. --- diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index d4879667c714..1dcd2b25c790 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -274,23 +274,21 @@ class BinASCIITest(unittest.TestCase): def test_base64_excess_data(self): # Test excess data exceptions - def assertExcessData(data, non_strict_expected, - ignore_padchar_expected=None): + def assertExcessData(data, expected): assert_regex = r'(?i)Excess data' data = self.type2test(data) with self.assertRaisesRegex(binascii.Error, assert_regex): binascii.a2b_base64(data, strict_mode=True) self.assertEqual(binascii.a2b_base64(data, strict_mode=False), - non_strict_expected) - if ignore_padchar_expected is not None: - self.assertEqual(binascii.a2b_base64(data, strict_mode=True, - ignorechars=b'='), - ignore_padchar_expected) - self.assertEqual(binascii.a2b_base64(data), non_strict_expected) - - assertExcessData(b'ab==c', b'i') - assertExcessData(b'ab==cd', b'i', b'i\xb7\x1d') - assertExcessData(b'abc=d', b'i\xb7', b'i\xb7\x1d') + expected) + self.assertEqual(binascii.a2b_base64(data, strict_mode=True, + ignorechars=b'='), + expected) + self.assertEqual(binascii.a2b_base64(data), expected) + + assertExcessData(b'ab==c=', b'i\xb7') + assertExcessData(b'ab==cd', b'i\xb7\x1d') + assertExcessData(b'abc=d', b'i\xb7\x1d') def test_base64errors(self): # Test base64 with invalid padding diff --git a/Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst b/Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst new file mode 100644 index 000000000000..22d53fe8db11 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst @@ -0,0 +1,4 @@ +Base64 decoder (see :func:`binascii.a2b_base64`, :func:`base64.b64decode`, etc) no +longer ignores excess data after the first padded quad in non-strict +(default) mode. Instead, in conformance with :rfc:`4648`, section 3.3, it now ignores +the pad character, "=", if it is present before the end of the encoded data. diff --git a/Modules/binascii.c b/Modules/binascii.c index 7907b74e36f0..a57bf3ee6339 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -800,40 +800,33 @@ fastpath: */ if (this_ch == BASE64_PAD) { pads++; - - if (strict_mode) { - if (quad_pos >= 2 && quad_pos + pads <= 4) { - continue; - } - if (ignorechar(BASE64_PAD, ignorechars, ignorecache)) { - continue; - } - if (quad_pos == 1) { - /* Set an error below. */ - break; - } - state = get_binascii_state(module); - if (state) { - PyErr_SetString(state->Error, - (quad_pos == 0 && ascii_data == data->buf) - ? "Leading padding not allowed" - : "Excess padding not allowed"); - } - goto error_end; + if (quad_pos >= 2 && quad_pos + pads <= 4) { + continue; } - else { - if (quad_pos >= 2 && quad_pos + pads >= 4) { - /* A pad sequence means we should not parse more input. - ** We've already interpreted the data from the quad at this point. - */ - goto done; - } + // See RFC 4648, section-3.3: "specifications MAY ignore the + // pad character, "=", treating it as non-alphabet data, if + // it is present before the end of the encoded data" and + // "the excess pad characters MAY also be ignored." + if (!strict_mode || ignorechar(BASE64_PAD, ignorechars, ignorecache)) { continue; } + if (quad_pos == 1) { + /* Set an error below. */ + break; + } + state = get_binascii_state(module); + if (state) { + PyErr_SetString(state->Error, + (quad_pos == 0 && ascii_data == data->buf) + ? "Leading padding not allowed" + : "Excess padding not allowed"); + } + goto error_end; } unsigned char v = table_a2b[this_ch]; if (v >= 64) { + // See RFC 4648, section-3.3. if (strict_mode && !ignorechar(this_ch, ignorechars, ignorecache)) { state = get_binascii_state(module); if (state) { @@ -844,7 +837,8 @@ fastpath: continue; } - // Characters that are not '=', in the middle of the padding, are not allowed + // Characters that are not '=', in the middle of the padding, are + // not allowed (except when they are). See RFC 4648, section-3.3. if (pads && strict_mode && !ignorechar(BASE64_PAD, ignorechars, ignorecache)) { @@ -908,7 +902,6 @@ fastpath: goto error_end; } -done: Py_XDECREF(table_obj); return PyBytesWriter_FinishWithPointer(writer, bin_data);