gh-144001: Support ignorechars in binascii.a2b_base64() and base64.b64decode() (GH...

author Serhiy Storchaka <storchaka@gmail.com>

Mon, 26 Jan 2026 18:11:40 +0000 (20:11 +0200)

committer GitHub <noreply@github.com>

Mon, 26 Jan 2026 18:11:40 +0000 (20:11 +0200)
author Serhiy Storchaka <storchaka@gmail.com>
Mon, 26 Jan 2026 18:11:40 +0000 (20:11 +0200)
committer GitHub <noreply@github.com>
Mon, 26 Jan 2026 18:11:40 +0000 (20:11 +0200)
diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst

index 64d66fcf6bd50a98e552e74e2f248098c505a87c..65b8aeaef8e939abbf6cf1c3d3d6ea94d70b502a 100644 (file)
--- a/Doc/library/base64.rst
+++ b/Doc/library/base64.rst
@@ -73,6 +73,7 @@ POST request.
  
  
  .. function:: b64decode(s, altchars=None, validate=False)
+              b64decode(s, altchars=None, validate=True, *, ignorechars)
  
     Decode the Base64 encoded :term:`bytes-like object` or ASCII string
     *s* and return the decoded :class:`bytes`.
@@ -84,11 +85,17 @@ POST request.
     A :exc:`binascii.Error` exception is raised
     if *s* is incorrectly padded.
  
-   If *validate* is false (the default), characters that are neither
+   If *ignorechars* is specified, it should be a :term:`bytes-like object`
+   containing characters to ignore from the input when *validate* is true.
+   The default value of *validate* is ``True`` if *ignorechars* is specified,
+   ``False`` otherwise.
+
+   If *validate* is false, characters that are neither
     in the normal base-64 alphabet nor the alternative alphabet are
     discarded prior to the padding check, but the ``+`` and ``/`` characters
     keep their meaning if they are not in *altchars* (they will be discarded
     in future Python versions).
+
     If *validate* is true, these non-alphabet characters in the input
     result in a :exc:`binascii.Error`.
  
@@ -99,6 +106,10 @@ POST request.
        is now deprecated.
  
  
+   .. versionchanged:: next
+      Added the *ignorechars* parameter.
+
+
  .. function:: standard_b64encode(s)
  
     Encode :term:`bytes-like object` *s* using the standard Base64 alphabet
diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst

index eaf755711bc29274643563b082f4ccb8d63936e1..d9f0baedec85f2ff4ae71f163c632d47c22791bd 100644 (file)
--- a/Doc/library/binascii.rst
+++ b/Doc/library/binascii.rst
@@ -49,10 +49,16 @@ The :mod:`binascii` module defines the following functions:
  
  
  .. function:: a2b_base64(string, /, *, strict_mode=False)
+              a2b_base64(string, /, *, strict_mode=True, ignorechars)
  
     Convert a block of base64 data back to binary and return the binary data. More
     than one line may be passed at a time.
  
+   If *ignorechars* is specified, it should be a :term:`bytes-like object`
+   containing characters to ignore from the input when *strict_mode* is true.
+   The default value of *strict_mode* is ``True`` if *ignorechars* is specified,
+   ``False`` otherwise.
+
     If *strict_mode* is true, only valid base64 data will be converted. Invalid base64
     data will raise :exc:`binascii.Error`.
  
@@ -66,6 +72,9 @@ The :mod:`binascii` module defines the following functions:
     .. versionchanged:: 3.11
        Added the *strict_mode* parameter.
  
+   .. versionchanged:: next
+      Added the *ignorechars* parameter.
+
  
  .. function:: b2a_base64(data, *, wrapcol=0, newline=True)
  
diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst

index aec6b1ceea37cf135ff2342da224971644b52153..19c01b71f02fb68a86f114fac84c532ca57b321d 100644 (file)
--- a/Doc/whatsnew/3.15.rst
+++ b/Doc/whatsnew/3.15.rst
@@ -444,6 +444,8 @@ base64
  * Added the *wrapcol* parameter in :func:`~base64.b64encode`.
    (Contributed by Serhiy Storchaka in :gh:`143214`.)
  
+* Added the *ignorechars* parameter in :func:`~base64.b64decode`.
+  (Contributed by Serhiy Storchaka in :gh:`144001`.)
  
  binascii
  --------
@@ -451,6 +453,9 @@ binascii
  * Added the *wrapcol* parameter in :func:`~binascii.b2a_base64`.
    (Contributed by Serhiy Storchaka in :gh:`143214`.)
  
+* Added the *ignorechars* parameter in :func:`~binascii.a2b_base64`.
+  (Contributed by Serhiy Storchaka in :gh:`144001`.)
+
  
  calendar
  --------
diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h

index 4a5b2a925413bfe0b7cd89184b9cb0bc58d1c316..fc297a2933a7865c14ad6f904f08b630371d6cfb 100644 (file)
--- a/Include/internal/pycore_global_objects_fini_generated.h
+++ b/Include/internal/pycore_global_objects_fini_generated.h
@@ -1797,6 +1797,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
      _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ident));
      _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(identity_hint));
      _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ignore));
+    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ignorechars));
      _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(imag));
      _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(implieslink));
      _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(importlib));
diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h

index 7c2f44ef6dbe7a13c39277dcc0fa63d86108f605..563ccd7cf6d3f4cdf8dda43ba4feeb9659fc6675 100644 (file)
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -520,6 +520,7 @@ struct _Py_global_strings {
          STRUCT_FOR_ID(ident)
          STRUCT_FOR_ID(identity_hint)
          STRUCT_FOR_ID(ignore)
+        STRUCT_FOR_ID(ignorechars)
          STRUCT_FOR_ID(imag)
          STRUCT_FOR_ID(implieslink)
          STRUCT_FOR_ID(importlib)
diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h

index 6e7bad986dbeda5f316a85f050b362c03bed3a7b..ba7c0e6843451716414004b6e13f2af82bac3a35 100644 (file)
--- a/Include/internal/pycore_runtime_init_generated.h
+++ b/Include/internal/pycore_runtime_init_generated.h
@@ -1795,6 +1795,7 @@ extern "C" {
      INIT_ID(ident), \
      INIT_ID(identity_hint), \
      INIT_ID(ignore), \
+    INIT_ID(ignorechars), \
      INIT_ID(imag), \
      INIT_ID(implieslink), \
      INIT_ID(importlib), \
diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h

index 660115931da0a0cbc95b44c3a9fc704e810612d8..44063794293990549805b343a67c0ab8197db35f 100644 (file)
--- a/Include/internal/pycore_unicodeobject_generated.h
+++ b/Include/internal/pycore_unicodeobject_generated.h
@@ -1860,6 +1860,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
      _PyUnicode_InternStatic(interp, &string);
      assert(_PyUnicode_CheckConsistency(string, 1));
      assert(PyUnicode_GET_LENGTH(string) != 1);
+    string = &_Py_ID(ignorechars);
+    _PyUnicode_InternStatic(interp, &string);
+    assert(_PyUnicode_CheckConsistency(string, 1));
+    assert(PyUnicode_GET_LENGTH(string) != 1);
      string = &_Py_ID(imag);
      _PyUnicode_InternStatic(interp, &string);
      assert(_PyUnicode_CheckConsistency(string, 1));
diff --git a/Lib/base64.py b/Lib/base64.py

index 6e0da16b23ce99762cb6a5d1cf0a4f52255bc5ee..6e9d24f06493207a315f2c0af43066bd77d39f98 100644 (file)
--- a/Lib/base64.py
+++ b/Lib/base64.py
@@ -26,6 +26,8 @@ __all__ = [
      ]
  
  
+_NOT_SPECIFIED = ['NOT SPECIFIED']
+
  bytes_types = (bytes, bytearray)  # Types acceptable as binary data
  
  def _bytes_from_decode_data(s):
@@ -62,7 +64,7 @@ def b64encode(s, altchars=None, *, wrapcol=0):
      return encoded
  
  
-def b64decode(s, altchars=None, validate=False):
+def b64decode(s, altchars=None, validate=_NOT_SPECIFIED, *, ignorechars=_NOT_SPECIFIED):
      """Decode the Base64 encoded bytes-like object or ASCII string s.
  
      Optional altchars must be a bytes-like object or ASCII string of length 2
@@ -72,38 +74,64 @@ def b64decode(s, altchars=None, validate=False):
      The result is returned as a bytes object.  A binascii.Error is raised if
      s is incorrectly padded.
  
-    If validate is false (the default), characters that are neither in the
-    normal base-64 alphabet nor the alternative alphabet are discarded prior
-    to the padding check.  If validate is true, these non-alphabet characters
-    in the input result in a binascii.Error.
+    If ignorechars is specified, it should be a byte string containing
+    characters to ignore from the input.  The default value of validate is
+    True if ignorechars is specified, False otherwise.
+
+    If validate is false, characters that are neither in the normal base-64
+    alphabet nor the alternative alphabet are discarded prior to the
+    padding check.  If validate is true, these non-alphabet characters in
+    the input result in a binascii.Error if they are not in ignorechars.
      For more information about the strict base64 check, see:
  
      https://docs.python.org/3.11/library/binascii.html#binascii.a2b_base64
      """
      s = _bytes_from_decode_data(s)
+    if validate is _NOT_SPECIFIED:
+        validate = ignorechars is not _NOT_SPECIFIED
+    if ignorechars is _NOT_SPECIFIED:
+        ignorechars = b''
      badchar = None
+    badchar_strict = False
      if altchars is not None:
          altchars = _bytes_from_decode_data(altchars)
          if len(altchars) != 2:
              raise ValueError(f'invalid altchars: {altchars!r}')
          for b in b'+/':
              if b not in altchars and b in s:
-                badchar = b
-                break
+                if badchar is None:
+                    badchar = b
+                if not validate:
+                    break
+                if not isinstance(ignorechars, (bytes, bytearray)):
+                    ignorechars = memoryview(ignorechars).cast('B')
+                if b not in ignorechars:
+                    badchar_strict = True
+                    badchar = b
+                    break
          s = s.translate(bytes.maketrans(altchars, b'+/'))
-    result = binascii.a2b_base64(s, strict_mode=validate)
+    result = binascii.a2b_base64(s, strict_mode=validate,
+                                 ignorechars=ignorechars)
      if badchar is not None:
          import warnings
-        if validate:
+        if badchar_strict:
              warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data '
                            f'with altchars={altchars!r} and validate=True '
                            f'will be an error in future Python versions',
                            DeprecationWarning, stacklevel=2)
          else:
-            warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data '
-                          f'with altchars={altchars!r} and validate=False '
-                          f'will be discarded in future Python versions',
-                          FutureWarning, stacklevel=2)
+            ignorechars = bytes(ignorechars)
+            if ignorechars:
+                warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data '
+                              f'with altchars={altchars!r} '
+                              f'and ignorechars={ignorechars!r} '
+                              f'will be discarded in future Python versions',
+                              FutureWarning, stacklevel=2)
+            else:
+                warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data '
+                              f'with altchars={altchars!r} and validate=False '
+                              f'will be discarded in future Python versions',
+                              FutureWarning, stacklevel=2)
      return result
  
  
diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py

index 6e69ece8065ea20a18d4d7b0e26fbf406d198d69..5f7a41f53345d219e9cac05db108489e5d4cdbe1 100644 (file)
--- a/Lib/test/test_base64.py
+++ b/Lib/test/test_base64.py
@@ -303,22 +303,26 @@ class BaseXYTestCase(unittest.TestCase):
  
      def test_b64decode_invalid_chars(self):
          # issue 1466065: Test some invalid characters.
-        tests = ((b'%3d==', b'\xdd'),
-                 (b'$3d==', b'\xdd'),
-                 (b'[==', b''),
-                 (b'YW]3=', b'am'),
-                 (b'3{d==', b'\xdd'),
-                 (b'3d}==', b'\xdd'),
-                 (b'@@', b''),
-                 (b'!', b''),
-                 (b"YWJj\n", b"abc"),
-                 (b'YWJj\nYWI=', b'abcab'))
+        tests = ((b'%3d==', b'\xdd', b'%$'),
+                 (b'$3d==', b'\xdd', b'%$'),
+                 (b'[==', b'', None),
+                 (b'YW]3=', b'am', b']'),
+                 (b'3{d==', b'\xdd', b'{}'),
+                 (b'3d}==', b'\xdd', b'{}'),
+                 (b'@@', b'', b'@!'),
+                 (b'!', b'', b'@!'),
+                 (b"YWJj\n", b"abc", b'\n'),
+                 (b'YWJj\nYWI=', b'abcab', b'\n'),
+                 (b'YW\nJj', b'abc', b'\n'),
+                 (b'YW\nJj', b'abc', bytearray(b'\n')),
+                 (b'YW\nJj', b'abc', memoryview(b'\n')),
+        )
          funcs = (
              base64.b64decode,
              base64.standard_b64decode,
              base64.urlsafe_b64decode,
          )
-        for bstr, res in tests:
+        for bstr, res, ignorechars in tests:
              for func in funcs:
                  with self.subTest(bstr=bstr, func=func):
                      self.assertEqual(func(bstr), res)
@@ -327,24 +331,76 @@ class BaseXYTestCase(unittest.TestCase):
                  base64.b64decode(bstr, validate=True)
              with self.assertRaises(binascii.Error):
                  base64.b64decode(bstr.decode('ascii'), validate=True)
+            with self.assertRaises(binascii.Error):
+                # Even empty ignorechars enables the strict mode.
+                base64.b64decode(bstr, ignorechars=b'')
+            if ignorechars is not None:
+                r = base64.b64decode(bstr, ignorechars=ignorechars)
+                self.assertEqual(r, res)
+
+        with self.assertRaises(TypeError):
+            base64.b64decode(b'', ignorechars='')
+        with self.assertRaises(TypeError):
+            base64.b64decode(b'', ignorechars=[])
+        with self.assertRaises(TypeError):
+            base64.b64decode(b'', ignorechars=None)
  
          # Normal alphabet characters will be discarded when alternative given
-        with self.assertWarns(FutureWarning):
-            self.assertEqual(base64.b64decode(b'++++', altchars=b'-_'),
-                             b'\xfb\xef\xbe')
-        with self.assertWarns(FutureWarning):
-            self.assertEqual(base64.b64decode(b'////', altchars=b'-_'),
-                             b'\xff\xff\xff')
-        with self.assertWarns(DeprecationWarning):
-            self.assertEqual(base64.b64decode(b'++++', altchars=b'-_', validate=True),
-                             b'\xfb\xef\xbe')
-        with self.assertWarns(DeprecationWarning):
-            self.assertEqual(base64.b64decode(b'////', altchars=b'-_', validate=True),
-                             b'\xff\xff\xff')
-        with self.assertWarns(FutureWarning):
+        discarded = ("invalid character %a in Base64 data with %s "
+                     "will be discarded in future Python versions")
+        error = ("invalid character %a in Base64 data with %s "
+                 "will be an error in future Python versions")
+        with self.assertWarns(FutureWarning) as cm:
+            r = base64.b64decode(b'++++', altchars=b'-_')
+        self.assertEqual(r, b'\xfb\xef\xbe')
+        self.assertEqual(str(cm.warning),
+                         discarded % ('+', "altchars=b'-_' and validate=False"))
+        with self.assertWarns(FutureWarning) as cm:
+            r = base64.b64decode(b'////', altchars=b'-_')
+        self.assertEqual(r, b'\xff\xff\xff')
+        self.assertEqual(str(cm.warning),
+                         discarded % ('/', "altchars=b'-_' and validate=False"))
+        with self.assertWarns(DeprecationWarning) as cm:
+            r = base64.b64decode(b'++++', altchars=b'-_', validate=True)
+        self.assertEqual(r, b'\xfb\xef\xbe')
+        self.assertEqual(str(cm.warning),
+                         error % ('+', "altchars=b'-_' and validate=True"))
+        with self.assertWarns(DeprecationWarning) as cm:
+            r = base64.b64decode(b'////', altchars=b'-_', validate=True)
+        self.assertEqual(r, b'\xff\xff\xff')
+        self.assertEqual(str(cm.warning),
+                         error % ('/', "altchars=b'-_' and validate=True"))
+        with self.assertWarns(FutureWarning) as cm:
+            r = base64.b64decode(b'++++', altchars=b'-_', ignorechars=b'+')
+        self.assertEqual(r, b'\xfb\xef\xbe')
+        self.assertEqual(str(cm.warning),
+                         discarded % ('+', "altchars=b'-_' and ignorechars=b'+'"))
+        with self.assertWarns(FutureWarning) as cm:
+            r = base64.b64decode(b'////', altchars=b'-_', ignorechars=b'/')
+        self.assertEqual(r, b'\xff\xff\xff')
+        self.assertEqual(str(cm.warning),
+                         discarded % ('/', "altchars=b'-_' and ignorechars=b'/'"))
+        with self.assertWarns(DeprecationWarning) as cm:
+            r = base64.b64decode(b'++++////', altchars=b'-_', ignorechars=b'+')
+        self.assertEqual(r, b'\xfb\xef\xbe\xff\xff\xff')
+        self.assertEqual(str(cm.warning),
+                         error % ('/', "altchars=b'-_' and validate=True"))
+        with self.assertWarns(DeprecationWarning) as cm:
+            r = base64.b64decode(b'++++////', altchars=b'-_', ignorechars=b'/')
+        self.assertEqual(r, b'\xfb\xef\xbe\xff\xff\xff')
+        self.assertEqual(str(cm.warning),
+                         error % ('+', "altchars=b'-_' and validate=True"))
+
+        with self.assertWarns(FutureWarning) as cm:
              self.assertEqual(base64.urlsafe_b64decode(b'++++'), b'\xfb\xef\xbe')
-        with self.assertWarns(FutureWarning):
+        self.assertEqual(str(cm.warning),
+                         "invalid character '+' in URL-safe Base64 data "
+                         "will be discarded in future Python versions")
+        with self.assertWarns(FutureWarning) as cm:
              self.assertEqual(base64.urlsafe_b64decode(b'////'), b'\xff\xff\xff')
+        self.assertEqual(str(cm.warning),
+                         "invalid character '/' in URL-safe Base64 data "
+                         "will be discarded in future Python versions")
          with self.assertRaises(binascii.Error):
              base64.b64decode(b'+/!', altchars=b'-_')
  
diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py

index 47e1e6ab035a176a65b84887d0ce4ed7c9be64ff..4cfc332e89bea850afb7e534b83c86b635006dbe 100644 (file)
--- a/Lib/test/test_binascii.py
+++ b/Lib/test/test_binascii.py
@@ -145,16 +145,16 @@ class BinASCIITest(unittest.TestCase):
  
          # Test excess data exceptions
          assertExcessData(b'ab==a', b'i')
-        assertExcessData(b'ab===', b'i')
-        assertExcessData(b'ab====', b'i')
-        assertExcessData(b'ab==:', b'i')
+        assertExcessPadding(b'ab===', b'i')
+        assertExcessPadding(b'ab====', b'i')
+        assertNonBase64Data(b'ab==:', b'i')
          assertExcessData(b'abc=a', b'i\xb7')
-        assertExcessData(b'abc=:', b'i\xb7')
-        assertExcessData(b'ab==\n', b'i')
-        assertExcessData(b'abc==', b'i\xb7')
-        assertExcessData(b'abc===', b'i\xb7')
-        assertExcessData(b'abc====', b'i\xb7')
-        assertExcessData(b'abc=====', b'i\xb7')
+        assertNonBase64Data(b'abc=:', b'i\xb7')
+        assertNonBase64Data(b'ab==\n', b'i')
+        assertExcessPadding(b'abc==', b'i\xb7')
+        assertExcessPadding(b'abc===', b'i\xb7')
+        assertExcessPadding(b'abc====', b'i\xb7')
+        assertExcessPadding(b'abc=====', b'i\xb7')
  
          # Test non-base64 data exceptions
          assertNonBase64Data(b'\nab==', b'i')
@@ -170,12 +170,45 @@ class BinASCIITest(unittest.TestCase):
          assertLeadingPadding(b'=====', b'')
          assertDiscontinuousPadding(b'ab=c=', b'i\xb7')
          assertDiscontinuousPadding(b'ab=ab==', b'i\xb6\x9b')
+        assertNonBase64Data(b'ab=:=', b'i')
          assertExcessPadding(b'abcd=', b'i\xb7\x1d')
          assertExcessPadding(b'abcd==', b'i\xb7\x1d')
          assertExcessPadding(b'abcd===', b'i\xb7\x1d')
          assertExcessPadding(b'abcd====', b'i\xb7\x1d')
          assertExcessPadding(b'abcd=====', b'i\xb7\x1d')
  
+    def test_base64_invalidchars(self):
+        def assertNonBase64Data(data, expected, ignorechars):
+            data = self.type2test(data)
+            assert_regex = r'(?i)Only base64 data'
+            self.assertEqual(binascii.a2b_base64(data), expected)
+            with self.assertRaisesRegex(binascii.Error, assert_regex):
+                binascii.a2b_base64(data, strict_mode=True)
+            with self.assertRaisesRegex(binascii.Error, assert_regex):
+                binascii.a2b_base64(data, ignorechars=b'')
+            self.assertEqual(binascii.a2b_base64(data, ignorechars=ignorechars),
+                             expected)
+            self.assertEqual(binascii.a2b_base64(data, strict_mode=False, ignorechars=b''),
+                             expected)
+
+        assertNonBase64Data(b'\nab==', b'i', ignorechars=b'\n')
+        assertNonBase64Data(b'ab:(){:|:&};:==', b'i', ignorechars=b':;(){}|&')
+        assertNonBase64Data(b'a\nb==', b'i', ignorechars=b'\n')
+        assertNonBase64Data(b'a\x00b==', b'i', ignorechars=b'\x00')
+        assertNonBase64Data(b'ab==:', b'i', ignorechars=b':')
+        assertNonBase64Data(b'abc=:', b'i\xb7', ignorechars=b':')
+        assertNonBase64Data(b'ab==\n', b'i', ignorechars=b'\n')
+        assertNonBase64Data(b'ab=:=', b'i', ignorechars=b':')
+        assertNonBase64Data(b'a\nb==', b'i', ignorechars=bytearray(b'\n'))
+        assertNonBase64Data(b'a\nb==', b'i', ignorechars=memoryview(b'\n'))
+
+        data = self.type2test(b'a\nb==')
+        with self.assertRaises(TypeError):
+            binascii.a2b_base64(data, ignorechars='')
+        with self.assertRaises(TypeError):
+            binascii.a2b_base64(data, ignorechars=[])
+        with self.assertRaises(TypeError):
+            binascii.a2b_base64(data, ignorechars=None)
  
      def test_base64errors(self):
          # Test base64 with invalid padding
diff --git a/Misc/NEWS.d/next/Library/2026-01-19-10-26-59.gh-issue-144001.dGj8Nk.rst b/Misc/NEWS.d/next/Library/2026-01-19-10-26-59.gh-issue-144001.dGj8Nk.rst

new file mode 100644 (file)

index 0000000..02d453f
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-01-19-10-26-59.gh-issue-144001.dGj8Nk.rst
@@ -0,0 +1,2 @@
+Added the *ignorechars* parameter in :func:`binascii.a2b_base64` and
+:func:`base64.b64decode`.
diff --git a/Modules/binascii.c b/Modules/binascii.c

index c569d3187f2e670cfc9a973088a4a72ced507f98..593b27ac5ede657bee74e8797169353e6eb07ea0 100644 (file)
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -469,32 +469,45 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
      return PyBytesWriter_FinishWithPointer(writer, ascii_data);
  }
  
+
+static int
+ignorechar(unsigned char c, Py_buffer *ignorechars)
+{
+    return (ignorechars->buf != NULL &&
+            memchr(ignorechars->buf, c, ignorechars->len));
+}
+
  /*[clinic input]
-@permit_long_docstring_body
  binascii.a2b_base64
  
      data: ascii_buffer
      /
      *
-    strict_mode: bool = False
+    strict_mode: bool(c_default="-1", py_default="<unrepresentable>") = False
+        When set to true, bytes that are not part of the base64 standard are
+        not allowed.  The same applies to excess data after padding (= / ==).
+        Set to True by default if ignorechars is specified, False otherwise.
+    ignorechars: Py_buffer(py_default="<unrepresentable>") = None
+        A byte string containing characters to ignore from the input when
+        strict_mode is true.
  
  Decode a line of base64 data.
-
-  strict_mode
-    When set to True, bytes that are not part of the base64 standard are not allowed.
-    The same applies to excess data after padding (= / ==).
  [clinic start generated code]*/
  
  static PyObject *
-binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
-/*[clinic end generated code: output=5409557788d4f975 input=13c797187acc9c40]*/
+binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
+                         Py_buffer *ignorechars)
+/*[clinic end generated code: output=eab37aea4cfa6daa input=3be4937d72943835]*/
  {
      assert(data->len >= 0);
  
      const unsigned char *ascii_data = data->buf;
      size_t ascii_len = data->len;
      binascii_state *state = NULL;
-    char padding_started = 0;
+
+    if (strict_mode == -1) {
+        strict_mode = (ignorechars->buf != NULL);
+    }
  
      /* Allocate the buffer */
      Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
@@ -504,14 +517,6 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
      }
      unsigned char *bin_data = PyBytesWriter_GetData(writer);
  
-    if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') {
-        state = get_binascii_state(module);
-        if (state) {
-            PyErr_SetString(state->Error, "Leading padding not allowed");
-        }
-        goto error_end;
-    }
-
      size_t i = 0;  /* Current position in input */
  
      /* Fast path: use optimized decoder for complete quads.
@@ -538,36 +543,44 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
          ** the invalid ones.
          */
          if (this_ch == BASE64_PAD) {
-            padding_started = 1;
+            pads++;
  
-            if (strict_mode && quad_pos == 0) {
-                state = get_binascii_state(module);
-                if (state) {
-                    PyErr_SetString(state->Error, "Excess padding not allowed");
+            if (strict_mode) {
+                if (quad_pos == 0) {
+                    state = get_binascii_state(module);
+                    if (state) {
+                        PyErr_SetString(state->Error, (i == 0)
+                            ? "Leading padding not allowed"
+                            : "Excess padding not allowed");
+                    }
+                    goto error_end;
                  }
-                goto error_end;
-            }
-            if (quad_pos >= 2 && quad_pos + ++pads >= 4) {
-                /* A pad sequence means we should not parse more input.
-                ** We've already interpreted the data from the quad at this point.
-                ** in strict mode, an error should raise if there's excess data after the padding.
-                */
-                if (strict_mode && i + 1 < ascii_len) {
+                if (quad_pos == 1) {
+                    /* Set an error below. */
+                    break;
+                }
+                if (quad_pos + pads > 4) {
                      state = get_binascii_state(module);
                      if (state) {
-                        PyErr_SetString(state->Error, "Excess data after padding");
+                        PyErr_SetString(state->Error, "Excess padding not allowed");
                      }
                      goto error_end;
                  }
-
-                goto done;
+            }
+            else {
+                if (quad_pos >= 2 && quad_pos + pads >= 4) {
+                    /* A pad sequence means we should not parse more input.
+                    ** We've already interpreted the data from the quad at this point.
+                    */
+                    goto done;
+                }
              }
              continue;
          }
  
-        this_ch = table_a2b_base64[this_ch];
-        if (this_ch >= 64) {
-            if (strict_mode) {
+        unsigned char v = table_a2b_base64[this_ch];
+        if (v >= 64) {
+            if (strict_mode && !ignorechar(this_ch, ignorechars)) {
                  state = get_binascii_state(module);
                  if (state) {
                      PyErr_SetString(state->Error, "Only base64 data is allowed");
@@ -578,10 +591,12 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
          }
  
          // Characters that are not '=', in the middle of the padding, are not allowed
-        if (strict_mode && padding_started) {
+        if (strict_mode && pads) {
              state = get_binascii_state(module);
              if (state) {
-                PyErr_SetString(state->Error, "Discontinuous padding not allowed");
+                PyErr_SetString(state->Error, (quad_pos + pads == 4)
+                    ? "Excess data after padding"
+                    : "Discontinuous padding not allowed");
              }
              goto error_end;
          }
@@ -590,44 +605,46 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
          switch (quad_pos) {
              case 0:
                  quad_pos = 1;
-                leftchar = this_ch;
+                leftchar = v;
                  break;
              case 1:
                  quad_pos = 2;
-                *bin_data++ = (leftchar << 2) | (this_ch >> 4);
-                leftchar = this_ch & 0x0f;
+                *bin_data++ = (leftchar << 2) | (v >> 4);
+                leftchar = v & 0x0f;
                  break;
              case 2:
                  quad_pos = 3;
-                *bin_data++ = (leftchar << 4) | (this_ch >> 2);
-                leftchar = this_ch & 0x03;
+                *bin_data++ = (leftchar << 4) | (v >> 2);
+                leftchar = v & 0x03;
                  break;
              case 3:
                  quad_pos = 0;
-                *bin_data++ = (leftchar << 6) | (this_ch);
+                *bin_data++ = (leftchar << 6) | (v);
                  leftchar = 0;
                  break;
          }
      }
  
-    if (quad_pos != 0) {
+    if (quad_pos == 1) {
+        /* There is exactly one extra valid, non-padding, base64 character.
+        ** This is an invalid length, as there is no possible input that
+        ** could encoded into such a base64 string.
+        */
          state = get_binascii_state(module);
-        if (state == NULL) {
-            /* error already set, from get_binascii_state */
-            assert(PyErr_Occurred());
-        } else if (quad_pos == 1) {
-            /*
-            ** There is exactly one extra valid, non-padding, base64 character.
-            ** This is an invalid length, as there is no possible input that
-            ** could encoded into such a base64 string.
-            */
+        if (state) {
              unsigned char *bin_data_start = PyBytesWriter_GetData(writer);
              PyErr_Format(state->Error,
                           "Invalid base64-encoded string: "
                           "number of data characters (%zd) cannot be 1 more "
                           "than a multiple of 4",
                           (bin_data - bin_data_start) / 3 * 4 + 1);
-        } else {
+        }
+        goto error_end;
+    }
+
+    if (quad_pos != 0 && quad_pos + pads != 4) {
+        state = get_binascii_state(module);
+        if (state) {
              PyErr_SetString(state->Error, "Incorrect padding");
          }
          goto error_end;
diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h

index 524f5fc93d0c21a843bf8c77c24e10282d4e5e7a..91325b1bdddf89611de884b73357dda416df9aab 100644 (file)
--- a/Modules/clinic/binascii.c.h
+++ b/Modules/clinic/binascii.c.h
@@ -116,20 +116,26 @@ exit:
  }
  
  PyDoc_STRVAR(binascii_a2b_base64__doc__,
-"a2b_base64($module, data, /, *, strict_mode=False)\n"
+"a2b_base64($module, data, /, *, strict_mode=<unrepresentable>,\n"
+"           ignorechars=<unrepresentable>)\n"
  "--\n"
  "\n"
  "Decode a line of base64 data.\n"
  "\n"
  "  strict_mode\n"
-"    When set to True, bytes that are not part of the base64 standard are not allowed.\n"
-"    The same applies to excess data after padding (= / ==).");
+"    When set to true, bytes that are not part of the base64 standard are\n"
+"    not allowed.  The same applies to excess data after padding (= / ==).\n"
+"    Set to True by default if ignorechars is specified, False otherwise.\n"
+"  ignorechars\n"
+"    A byte string containing characters to ignore from the input when\n"
+"    strict_mode is true.");
  
  #define BINASCII_A2B_BASE64_METHODDEF    \
      {"a2b_base64", _PyCFunction_CAST(binascii_a2b_base64), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base64__doc__},
  
  static PyObject *
-binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode);
+binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
+                         Py_buffer *ignorechars);
  
  static PyObject *
  binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
@@ -137,7 +143,7 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P
      PyObject *return_value = NULL;
      #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
  
-    #define NUM_KEYWORDS 1
+    #define NUM_KEYWORDS 2
      static struct {
          PyGC_Head _this_is_not_used;
          PyObject_VAR_HEAD
@@ -146,7 +152,7 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P
      } _kwtuple = {
          .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
          .ob_hash = -1,
-        .ob_item = { &_Py_ID(strict_mode), },
+        .ob_item = { &_Py_ID(strict_mode), &_Py_ID(ignorechars), },
      };
      #undef NUM_KEYWORDS
      #define KWTUPLE (&_kwtuple.ob_base.ob_base)
@@ -155,17 +161,18 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P
      #  define KWTUPLE NULL
      #endif  // !Py_BUILD_CORE
  
-    static const char * const _keywords[] = {"", "strict_mode", NULL};
+    static const char * const _keywords[] = {"", "strict_mode", "ignorechars", NULL};
      static _PyArg_Parser _parser = {
          .keywords = _keywords,
          .fname = "a2b_base64",
          .kwtuple = KWTUPLE,
      };
      #undef KWTUPLE
-    PyObject *argsbuf[2];
+    PyObject *argsbuf[3];
      Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
      Py_buffer data = {NULL, NULL};
-    int strict_mode = 0;
+    int strict_mode = -1;
+    Py_buffer ignorechars = {NULL, NULL};
  
      args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
              /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
@@ -178,17 +185,29 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P
      if (!noptargs) {
          goto skip_optional_kwonly;
      }
-    strict_mode = PyObject_IsTrue(args[1]);
-    if (strict_mode < 0) {
+    if (args[1]) {
+        strict_mode = PyObject_IsTrue(args[1]);
+        if (strict_mode < 0) {
+            goto exit;
+        }
+        if (!--noptargs) {
+            goto skip_optional_kwonly;
+        }
+    }
+    if (PyObject_GetBuffer(args[2], &ignorechars, PyBUF_SIMPLE) != 0) {
          goto exit;
      }
  skip_optional_kwonly:
-    return_value = binascii_a2b_base64_impl(module, &data, strict_mode);
+    return_value = binascii_a2b_base64_impl(module, &data, strict_mode, &ignorechars);
  
  exit:
      /* Cleanup for data */
      if (data.obj)
         PyBuffer_Release(&data);
+    /* Cleanup for ignorechars */
+    if (ignorechars.obj) {
+       PyBuffer_Release(&ignorechars);
+    }
  
      return return_value;
  }
@@ -823,4 +842,4 @@ exit:
  
      return return_value;
  }
-/*[clinic end generated code: output=644ccdc8e0d56e65 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=13f0a4b0f5d3fcb4 input=a9049054013a1b77]*/
author	Serhiy Storchaka <storchaka@gmail.com>
	Mon, 26 Jan 2026 18:11:40 +0000 (20:11 +0200)
committer	GitHub <noreply@github.com>
	Mon, 26 Jan 2026 18:11:40 +0000 (20:11 +0200)
Doc/library/base64.rst		patch \| blob \| blame \| history
Doc/library/binascii.rst		patch \| blob \| blame \| history
Doc/whatsnew/3.15.rst		patch \| blob \| blame \| history
Include/internal/pycore_global_objects_fini_generated.h		patch \| blob \| blame \| history
Include/internal/pycore_global_strings.h		patch \| blob \| blame \| history
Include/internal/pycore_runtime_init_generated.h		patch \| blob \| blame \| history
Include/internal/pycore_unicodeobject_generated.h		patch \| blob \| blame \| history
Lib/base64.py		patch \| blob \| blame \| history
Lib/test/test_base64.py		patch \| blob \| blame \| history
Lib/test/test_binascii.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2026-01-19-10-26-59.gh-issue-144001.dGj8Nk.rst	[new file with mode: 0644]	patch \| blob
Modules/binascii.c		patch \| blob \| blame \| history
Modules/clinic/binascii.c.h		patch \| blob \| blame \| history