gh-136702: Deprecate passing non-ascii *encoding* (str) to `encodings.normalize_encod...

author Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>

Sun, 9 Nov 2025 12:37:34 +0000 (12:37 +0000)

committer GitHub <noreply@github.com>

Sun, 9 Nov 2025 12:37:34 +0000 (13:37 +0100)
author Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>
Sun, 9 Nov 2025 12:37:34 +0000 (12:37 +0000)
committer GitHub <noreply@github.com>
Sun, 9 Nov 2025 12:37:34 +0000 (13:37 +0100)
diff --git a/Doc/deprecations/pending-removal-in-3.17.rst b/Doc/deprecations/pending-removal-in-3.17.rst

index 0a1c2f08cab3bd795617c1a6a7a5a7dbf92a0af1..e769c9d371e133c9b5b372f95b8eb7aff1937db5 100644 (file)
--- a/Doc/deprecations/pending-removal-in-3.17.rst
+++ b/Doc/deprecations/pending-removal-in-3.17.rst
@@ -23,6 +23,12 @@ Pending removal in Python 3.17
      (Contributed by Shantanu Jain in :gh:`91896`.)
  
  
+* :mod:`encodings`:
+
+  - Passing non-ascii *encoding* names to :func:`encodings.normalize_encoding`
+    is deprecated and scheduled for removal in Python 3.17.
+    (Contributed by Stan Ulbrych in :gh:`136702`)
+
  * :mod:`typing`:
  
    - Before Python 3.14, old-style unions were implemented using the private class
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py

index 91243378dc04416622bbd0ade23effea6c90f274..c7f665b3990512a2c78e6381d52aa696e86bdf9e 100644 (file)
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -796,6 +796,10 @@ class MimeParameters(TokenList):
                          value = urllib.parse.unquote(value, encoding='latin-1')
                      else:
                          try:
+                            # Explicitly look up the codec for warning generation, see gh-140030
+                            # Can be removed in 3.17
+                            import codecs
+                            codecs.lookup(charset)
                              value = value.decode(charset, 'surrogateescape')
                          except (LookupError, UnicodeEncodeError):
                              # XXX: there should really be a custom defect for
diff --git a/Lib/email/utils.py b/Lib/email/utils.py

index 3de1f0d24a15b0aa032c6b4a907dcf6ffd371ee9..d4824dc3601b2dd6f24c46a64d143cfaaa0cf88b 100644 (file)
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -460,6 +460,10 @@ def collapse_rfc2231_value(value, errors='replace',
          charset = fallback_charset
      rawbytes = bytes(text, 'raw-unicode-escape')
      try:
+        # Explicitly look up the codec for warning generation, see gh-140030
+        # Can be removed in 3.17
+        import codecs
+        codecs.lookup(charset)
          return str(rawbytes, charset, errors)
      except LookupError:
          # charset is not a known codec.
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py

index e7e4ca3358e0f92ba89e7604cd3f018d2a8a5141..e205ec326376d8134ba8d1ec3ca15a16c224e1d1 100644 (file)
--- a/Lib/encodings/__init__.py
+++ b/Lib/encodings/__init__.py
@@ -26,7 +26,7 @@ Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
  
-"""#"
+"""
  
  import codecs
  import sys
@@ -56,6 +56,12 @@ def normalize_encoding(encoding):
      if isinstance(encoding, bytes):
          encoding = str(encoding, "ascii")
  
+    if not encoding.isascii():
+        import warnings
+        warnings.warn(
+            "Support for non-ascii encoding names will be removed in 3.17",
+            DeprecationWarning, stacklevel=2)
+
      return _normalize_encoding(encoding)
  
  def search_function(encoding):
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py

index c35a4508943506ef68731aec6c4ab27ef53c2b5c..f1f0ac5ad36fd2acc620ee4d2359d8b31dd6242f 100644 (file)
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -3886,15 +3886,14 @@ class CodecNameNormalizationTest(unittest.TestCase):
          self.assertEqual(codecs.lookup('TEST.AAA 8'), ('test.aaa-8', 2, 3, 4))
          self.assertEqual(codecs.lookup('TEST.AAA---8'), ('test.aaa---8', 2, 3, 4))
          self.assertEqual(codecs.lookup('TEST.AAA   8'), ('test.aaa---8', 2, 3, 4))
-        self.assertEqual(codecs.lookup('TEST.AAA\xe9\u20ac-8'), ('test.aaa\xe9\u20ac-8', 2, 3, 4))
          self.assertEqual(codecs.lookup('TEST.AAA.8'), ('test.aaa.8', 2, 3, 4))
          self.assertEqual(codecs.lookup('TEST.AAA...8'), ('test.aaa...8', 2, 3, 4))
+        with self.assertWarns(DeprecationWarning):
+            self.assertEqual(codecs.lookup('TEST.AAA\xe9\u20ac-8'), ('test.aaa\xe9\u20ac-8', 2, 3, 4))
  
      def test_encodings_normalize_encoding(self):
-        # encodings.normalize_encoding() ignores non-ASCII characters.
          normalize = encodings.normalize_encoding
          self.assertEqual(normalize('utf_8'), 'utf_8')
-        self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
          self.assertEqual(normalize('utf   8'), 'utf_8')
          # encodings.normalize_encoding() doesn't convert
          # characters to lower case.
@@ -3902,6 +3901,11 @@ class CodecNameNormalizationTest(unittest.TestCase):
          self.assertEqual(normalize('utf.8'), 'utf.8')
          self.assertEqual(normalize('utf...8'), 'utf...8')
  
+        # Non-ASCII *encoding* is deprecated.
+        with self.assertWarnsRegex(DeprecationWarning,
+                "Support for non-ascii encoding names will be removed in 3.17"):
+            self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
+
  
  if __name__ == "__main__":
      unittest.main()
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py

index 4cd587bcd7604076c753f0a1c160cffe41d7fc9e..1900adf463befcc1f69b264a8387d689af8fb1a9 100644 (file)
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -5738,7 +5738,8 @@ Content-Disposition: inline; filename*=utf-8\udce2\udc80\udc9d''myfile.txt
  
  """
          msg = email.message_from_string(m)
-        self.assertEqual(msg.get_filename(), 'myfile.txt')
+        with self.assertWarns(DeprecationWarning):
+            self.assertEqual(msg.get_filename(), 'myfile.txt')
  
      def test_rfc2231_single_tick_in_filename_extended(self):
          eq = self.assertEqual
diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py

index ff7a6da644d57219d0393ea35868f83fd79293c8..1d0d0a49a829174777513d77721990e368275038 100644 (file)
--- a/Lib/test/test_email/test_headerregistry.py
+++ b/Lib/test/test_email/test_headerregistry.py
@@ -247,7 +247,15 @@ class TestContentTypeHeader(TestHeaderBase):
          decoded =  args[2] if l>2 and args[2] is not DITTO else source
          header = 'Content-Type:' + ' ' if source else ''
          folded = args[3] if l>3 else header + decoded + '\n'
-        h = self.make_header('Content-Type', source)
+        # Both rfc2231 test cases with utf-8%E2%80%9D raise warnings,
+        # clear encoding cache to ensure test isolation.
+        if 'utf-8%E2%80%9D' in source and 'ascii' not in source:
+            import encodings
+            encodings._cache.clear()
+            with self.assertWarns(DeprecationWarning):
+                h = self.make_header('Content-Type', source)
+        else:
+            h = self.make_header('Content-Type', source)
          self.assertEqual(h.content_type, content_type)
          self.assertEqual(h.maintype, maintype)
          self.assertEqual(h.subtype, subtype)
diff --git a/Misc/NEWS.d/next/Library/2025-10-13-11-25-41.gh-issue-136702.uvLGK1.rst b/Misc/NEWS.d/next/Library/2025-10-13-11-25-41.gh-issue-136702.uvLGK1.rst

new file mode 100644 (file)

index 0000000..88303f0
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-10-13-11-25-41.gh-issue-136702.uvLGK1.rst
@@ -0,0 +1,3 @@
+:mod:`encodings`: Deprecate passing a non-ascii *encoding* name to
+:func:`encodings.normalize_encoding` and schedule removal of support for
+Python 3.17.
author	Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>
	Sun, 9 Nov 2025 12:37:34 +0000 (12:37 +0000)
committer	GitHub <noreply@github.com>
	Sun, 9 Nov 2025 12:37:34 +0000 (13:37 +0100)
Doc/deprecations/pending-removal-in-3.17.rst		patch \| blob \| blame \| history
Lib/email/_header_value_parser.py		patch \| blob \| blame \| history
Lib/email/utils.py		patch \| blob \| blame \| history
Lib/encodings/__init__.py		patch \| blob \| blame \| history
Lib/test/test_codecs.py		patch \| blob \| blame \| history
Lib/test/test_email/test_email.py		patch \| blob \| blame \| history
Lib/test/test_email/test_headerregistry.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2025-10-13-11-25-41.gh-issue-136702.uvLGK1.rst	[new file with mode: 0644]	patch \| blob