gh-125346: Deprecate accepting standard Base64 alphabet when alternative alphabet...

author Serhiy Storchaka <storchaka@gmail.com>

Wed, 21 Jan 2026 07:41:58 +0000 (09:41 +0200)

committer GitHub <noreply@github.com>

Wed, 21 Jan 2026 07:41:58 +0000 (09:41 +0200)
author Serhiy Storchaka <storchaka@gmail.com>
Wed, 21 Jan 2026 07:41:58 +0000 (09:41 +0200)
committer GitHub <noreply@github.com>
Wed, 21 Jan 2026 07:41:58 +0000 (09:41 +0200)
diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst

index 4876117f6403b21358406fb8967cb2675617079a..3e7884debd59485815ef76b804dcd1297d8b8184 100644 (file)
--- a/Doc/library/base64.rst
+++ b/Doc/library/base64.rst
@@ -84,15 +84,20 @@ POST request.
     A :exc:`binascii.Error` exception is raised
     if *s* is incorrectly padded.
  
-   If *validate* is ``False`` (the default), characters that are neither
+   If *validate* is false (the default), characters that are neither
     in the normal base-64 alphabet nor the alternative alphabet are
-   discarded prior to the padding check.  If *validate* is ``True``,
-   these non-alphabet characters in the input result in a
-   :exc:`binascii.Error`.
+   discarded prior to the padding check, but the ``+`` and ``/`` characters
+   keep their meaning if they are not in *altchars* (they will be discarded
+   in future Python versions).
+   If *validate* is true, these non-alphabet characters in the input
+   result in a :exc:`binascii.Error`.
  
     For more information about the strict base64 check, see :func:`binascii.a2b_base64`
  
-   May assert or raise a :exc:`ValueError` if the length of *altchars* is not 2.
+   .. deprecated:: next
+      Accepting the ``+`` and ``/`` characters with an alternative alphabet
+      is now deprecated.
+
  
  .. function:: standard_b64encode(s)
  
@@ -123,6 +128,9 @@ POST request.
     ``/`` in the standard Base64 alphabet, and return the decoded
     :class:`bytes`.
  
+   .. deprecated:: next
+      Accepting the ``+`` and ``/`` characters is now deprecated.
+
  
  .. function:: b32encode(s)
  
diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst

index 1dd66065b0f93d31087514eb35be3a4a9d45612e..1d4961d7293631f6d1ce4f4b069066c67f7db7b4 100644 (file)
--- a/Doc/whatsnew/3.15.rst
+++ b/Doc/whatsnew/3.15.rst
@@ -1149,6 +1149,15 @@ Deprecated
  New deprecations
  ----------------
  
+* :mod:`base64`:
+
+  * Accepting the ``+`` and ``/`` characters with an alternative alphabet in
+    :func:`~base64.b64decode` and :func:`~base64.urlsafe_b64decode` is now
+    deprecated.
+    In future Python versions they will be errors in the strict mode and
+    discarded in the non-strict mode.
+    (Contributed by Serhiy Storchaka in :gh:`125346`.)
+
  * CLI:
  
    * Deprecate :option:`-b` and :option:`!-bb` command-line options
diff --git a/Lib/base64.py b/Lib/base64.py

index e62ae6aff580fa5a209ac2bad8f9933e8e88e214..6e0da16b23ce99762cb6a5d1cf0a4f52255bc5ee 100644 (file)
--- a/Lib/base64.py
+++ b/Lib/base64.py
@@ -72,20 +72,39 @@ def b64decode(s, altchars=None, validate=False):
      The result is returned as a bytes object.  A binascii.Error is raised if
      s is incorrectly padded.
  
-    If validate is False (the default), characters that are neither in the
+    If validate is false (the default), characters that are neither in the
      normal base-64 alphabet nor the alternative alphabet are discarded prior
-    to the padding check.  If validate is True, these non-alphabet characters
+    to the padding check.  If validate is true, these non-alphabet characters
      in the input result in a binascii.Error.
      For more information about the strict base64 check, see:
  
      https://docs.python.org/3.11/library/binascii.html#binascii.a2b_base64
      """
      s = _bytes_from_decode_data(s)
+    badchar = None
      if altchars is not None:
          altchars = _bytes_from_decode_data(altchars)
-        assert len(altchars) == 2, repr(altchars)
+        if len(altchars) != 2:
+            raise ValueError(f'invalid altchars: {altchars!r}')
+        for b in b'+/':
+            if b not in altchars and b in s:
+                badchar = b
+                break
          s = s.translate(bytes.maketrans(altchars, b'+/'))
-    return binascii.a2b_base64(s, strict_mode=validate)
+    result = binascii.a2b_base64(s, strict_mode=validate)
+    if badchar is not None:
+        import warnings
+        if validate:
+            warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data '
+                          f'with altchars={altchars!r} and validate=True '
+                          f'will be an error in future Python versions',
+                          DeprecationWarning, stacklevel=2)
+        else:
+            warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data '
+                          f'with altchars={altchars!r} and validate=False '
+                          f'will be discarded in future Python versions',
+                          FutureWarning, stacklevel=2)
+    return result
  
  
  def standard_b64encode(s):
@@ -130,8 +149,19 @@ def urlsafe_b64decode(s):
      The alphabet uses '-' instead of '+' and '_' instead of '/'.
      """
      s = _bytes_from_decode_data(s)
+    badchar = None
+    for b in b'+/':
+        if b in s:
+            badchar = b
+            break
      s = s.translate(_urlsafe_decode_translation)
-    return b64decode(s)
+    result = binascii.a2b_base64(s, strict_mode=False)
+    if badchar is not None:
+        import warnings
+        warnings.warn(f'invalid character {chr(badchar)!a} in URL-safe Base64 data '
+                      f'will be discarded in future Python versions',
+                      FutureWarning, stacklevel=2)
+    return result
  
  
  
diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py

index 120c5824a42a40aaa4d90b903cc415af04e42484..d02992903f15a750566912abd1de44ce161b8b69 100644 (file)
--- a/Lib/test/test_base64.py
+++ b/Lib/test/test_base64.py
@@ -292,6 +292,11 @@ class BaseXYTestCase(unittest.TestCase):
              eq(base64.b64decode(data, altchars=altchars_str), res)
              eq(base64.b64decode(data_str, altchars=altchars_str), res)
  
+        self.assertRaises(ValueError, base64.b64decode, b'', altchars=b'+')
+        self.assertRaises(ValueError, base64.b64decode, b'', altchars=b'+/-')
+        self.assertRaises(ValueError, base64.b64decode, '', altchars='+')
+        self.assertRaises(ValueError, base64.b64decode, '', altchars='+/-')
+
      def test_b64decode_padding_error(self):
          self.assertRaises(binascii.Error, base64.b64decode, b'abc')
          self.assertRaises(binascii.Error, base64.b64decode, 'abc')
@@ -323,13 +328,25 @@ class BaseXYTestCase(unittest.TestCase):
              with self.assertRaises(binascii.Error):
                  base64.b64decode(bstr.decode('ascii'), validate=True)
  
-        # Normal alphabet characters not discarded when alternative given
-        res = b'\xfb\xef\xff'
-        self.assertEqual(base64.b64decode(b'++//', validate=True), res)
-        self.assertEqual(base64.b64decode(b'++//', '-_', validate=True), res)
-        self.assertEqual(base64.b64decode(b'--__', '-_', validate=True), res)
-        self.assertEqual(base64.urlsafe_b64decode(b'++//'), res)
-        self.assertEqual(base64.urlsafe_b64decode(b'--__'), res)
+        # Normal alphabet characters will be discarded when alternative given
+        with self.assertWarns(FutureWarning):
+            self.assertEqual(base64.b64decode(b'++++', altchars=b'-_'),
+                             b'\xfb\xef\xbe')
+        with self.assertWarns(FutureWarning):
+            self.assertEqual(base64.b64decode(b'////', altchars=b'-_'),
+                             b'\xff\xff\xff')
+        with self.assertWarns(DeprecationWarning):
+            self.assertEqual(base64.b64decode(b'++++', altchars=b'-_', validate=True),
+                             b'\xfb\xef\xbe')
+        with self.assertWarns(DeprecationWarning):
+            self.assertEqual(base64.b64decode(b'////', altchars=b'-_', validate=True),
+                             b'\xff\xff\xff')
+        with self.assertWarns(FutureWarning):
+            self.assertEqual(base64.urlsafe_b64decode(b'++++'), b'\xfb\xef\xbe')
+        with self.assertWarns(FutureWarning):
+            self.assertEqual(base64.urlsafe_b64decode(b'////'), b'\xff\xff\xff')
+        with self.assertRaises(binascii.Error):
+            base64.b64decode(b'+/!', altchars=b'-_')
  
      def _altchars_strategy():
          """Generate 'altchars' for base64 encoding."""
diff --git a/Misc/NEWS.d/next/Library/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst b/Misc/NEWS.d/next/Library/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst

new file mode 100644 (file)

index 0000000..187a6eb
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst
@@ -0,0 +1,5 @@
+Accepting ``+`` and ``/`` characters with an alternative alphabet in
+:func:`base64.b64decode` and :func:`base64.urlsafe_b64decode` is now
+deprecated.
+In future Python versions they will be errors in the strict mode and
+discarded in the non-strict mode.
author	Serhiy Storchaka <storchaka@gmail.com>
	Wed, 21 Jan 2026 07:41:58 +0000 (09:41 +0200)
committer	GitHub <noreply@github.com>
	Wed, 21 Jan 2026 07:41:58 +0000 (09:41 +0200)
Doc/library/base64.rst		patch \| blob \| blame \| history
Doc/whatsnew/3.15.rst		patch \| blob \| blame \| history
Lib/base64.py		patch \| blob \| blame \| history
Lib/test/test_base64.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2025-11-06-12-03-29.gh-issue-125346.7Gfpgw.rst	[new file with mode: 0644]	patch \| blob