]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-81074: Allow non-ASCII addr_spec in email.headerregistry.Address (#122477)
authorMike Edmunds <medmunds@gmail.com>
Fri, 1 May 2026 18:00:46 +0000 (11:00 -0700)
committerGitHub <noreply@github.com>
Fri, 1 May 2026 18:00:46 +0000 (14:00 -0400)
The email.headerregistry.Address constructor raised an error if
addr_spec contained a non-ASCII character. (But it fully supports
non-ASCII in the separate username and domain args.) This change
removes the error for a non-ASCII addr_spec, as well as the
Defect that triggered it.  In the unicode era non-ascii is not a
defect, though it is an error when an attempt is made to serialize
it to ascii.  The serialization issue was handled in #122540.

Lib/email/_header_value_parser.py
Lib/email/errors.py
Lib/test/test_email/test__header_value_parser.py
Lib/test/test_email/test_headerregistry.py
Misc/NEWS.d/next/Library/2024-07-30-19-19-33.gh-issue-81074.YAeWNf.rst [new file with mode: 0644]

index 26b6e26ae652fa6724f07d13af4d0f9b0d999d22..9873958f5c2790c1614a8caeacde27ec76d80169 100644 (file)
@@ -1503,11 +1503,6 @@ def get_local_part(value):
             local_part.defects.append(errors.ObsoleteHeaderDefect(
                 "local-part is not a dot-atom (contains CFWS)"))
         local_part[0] = obs_local_part
-    try:
-        local_part.value.encode('ascii')
-    except UnicodeEncodeError:
-        local_part.defects.append(errors.NonASCIILocalPartDefect(
-                "local-part contains non-ASCII characters)"))
     return local_part, value
 
 def get_obs_local_part(value):
index 6bc744bd59c5bb4d10512c39ef448fbd51019050..859307dd85be111ff2f5953781654fd81ef46f9d 100644 (file)
@@ -109,9 +109,9 @@ class ObsoleteHeaderDefect(HeaderDefect):
     """Header uses syntax declared obsolete by RFC 5322"""
 
 class NonASCIILocalPartDefect(HeaderDefect):
-    """local_part contains non-ASCII characters"""
-    # This defect only occurs during unicode parsing, not when
-    # parsing messages decoded from binary.
+    """Unused. Note: this error is deprecated and may be removed in the future."""
+    # RFC 6532 permits a non-ASCII local-part. _header_value_parser previously
+    # treated this as a parse-time defect (when parsing Unicode, but not bytes).
 
 class InvalidDateDefect(HeaderDefect):
     """Header has unparsable or invalid date"""
index bc698759614c36104ae0662e3d6f7e84220bb9b0..aded44e85ee3368d00736c77c4da4232fcd751c4 100644 (file)
@@ -1235,17 +1235,6 @@ class TestParser(TestParserMixin, TestEmailBase):
             '@example.com')
         self.assertEqual(local_part.local_part, r'\example\\ example')
 
-    def test_get_local_part_unicode_defect(self):
-        # Currently this only happens when parsing unicode, not when parsing
-        # stuff that was originally binary.
-        local_part = self._test_get_x(parser.get_local_part,
-            'exámple@example.com',
-            'exámple',
-            'exámple',
-            [errors.NonASCIILocalPartDefect],
-            '@example.com')
-        self.assertEqual(local_part.local_part, 'exámple')
-
     # get_dtext
 
     def test_get_dtext_only(self):
index 2aaa7d68ca3fe17a7e5526e4e5aa6c02555f3a68..aa918255d15c37e2ed0f9a232488257aeb4b10fa 100644 (file)
@@ -1543,17 +1543,19 @@ class TestAddressAndGroup(TestEmailBase):
         self.assertEqual(str(a), '"Sara J." <"bad name"@example.com>')
 
     def test_il8n(self):
-        a = Address('Éric', 'wok', 'exàmple.com')
+        a = Address('Éric', 'wők', 'exàmple.com')
         self.assertEqual(a.display_name, 'Éric')
-        self.assertEqual(a.username, 'wok')
+        self.assertEqual(a.username, 'wők')
         self.assertEqual(a.domain, 'exàmple.com')
-        self.assertEqual(a.addr_spec, 'wok@exàmple.com')
-        self.assertEqual(str(a), 'Éric <wok@exàmple.com>')
+        self.assertEqual(a.addr_spec, 'wők@exàmple.com')
+        self.assertEqual(str(a), 'Éric <wők@exàmple.com>')
 
-    # XXX: there is an API design issue that needs to be solved here.
-    #def test_non_ascii_username_raises(self):
-    #    with self.assertRaises(ValueError):
-    #        Address('foo', 'wők', 'example.com')
+    def test_i18n_in_addr_spec(self):
+        a = Address(addr_spec='wők@exàmple.com')
+        self.assertEqual(a.username, 'wők')
+        self.assertEqual(a.domain, 'exàmple.com')
+        self.assertEqual(a.addr_spec, 'wők@exàmple.com')
+        self.assertEqual(str(a), 'wők@exàmple.com')
 
     def test_crlf_in_constructor_args_raises(self):
         cases = (
@@ -1574,10 +1576,6 @@ class TestAddressAndGroup(TestEmailBase):
             with self.subTest(kwargs=kwargs), self.assertRaisesRegex(ValueError, "invalid arguments"):
                 Address(**kwargs)
 
-    def test_non_ascii_username_in_addr_spec_raises(self):
-        with self.assertRaises(ValueError):
-            Address('foo', addr_spec='wők@example.com')
-
     def test_address_addr_spec_and_username_raises(self):
         with self.assertRaises(TypeError):
             Address('foo', username='bing', addr_spec='bar@baz')
diff --git a/Misc/NEWS.d/next/Library/2024-07-30-19-19-33.gh-issue-81074.YAeWNf.rst b/Misc/NEWS.d/next/Library/2024-07-30-19-19-33.gh-issue-81074.YAeWNf.rst
new file mode 100644 (file)
index 0000000..87de4fa
--- /dev/null
@@ -0,0 +1,8 @@
+The :mod:`email` module no longer treats email addresses with non-ASCII
+characters as defects when parsing a Unicode string or in the ``addr_spec``
+parameter to :class:`email.headerregistry.Address`. :rfc:`5322` permits such
+addresses, and they were already supported when parsing bytes and in the Address
+``username`` parameter.
+
+The (undocumented) :exc:`!email.errors.NonASCIILocalPartDefect` is no longer
+used and should be considered deprecated.