]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-76511: Fix email.Message.as_string() for non-ASCII message with ASCII charset...
authorSerhiy Storchaka <storchaka@gmail.com>
Tue, 5 Mar 2024 15:49:01 +0000 (17:49 +0200)
committerGitHub <noreply@github.com>
Tue, 5 Mar 2024 15:49:01 +0000 (17:49 +0200)
Lib/email/generator.py
Lib/email/message.py
Lib/test/test_email/test_email.py
Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst [new file with mode: 0644]

index 7ccbe10eb7685677aa8345302c3365194f650d2e..c8056ad47baa0fb32f8d46a47167b90a0ab2bbf1 100644 (file)
@@ -243,7 +243,7 @@ class Generator:
                 # existing message.
                 msg = deepcopy(msg)
                 del msg['content-transfer-encoding']
-                msg.set_payload(payload, charset)
+                msg.set_payload(msg._payload, charset)
                 payload = msg.get_payload()
                 self._munge_cte = (msg['content-transfer-encoding'],
                                    msg['content-type'])
index fe769580fed5d0a5db7c8158ec4b9f4875fb40a9..a14cca56b3745aa9119af291ceeda4f497f18451 100644 (file)
@@ -340,7 +340,7 @@ class Message:
                 return
             if not isinstance(charset, Charset):
                 charset = Charset(charset)
-            payload = payload.encode(charset.output_charset)
+            payload = payload.encode(charset.output_charset, 'surrogateescape')
         if hasattr(payload, 'decode'):
             self._payload = payload.decode('ascii', 'surrogateescape')
         else:
index 39d4ace8d4a1d8c2d101f7376b3fada7f0cc4ff1..d9af05c306eb3035b859641f6cd7736691994ec6 100644 (file)
@@ -337,6 +337,21 @@ class TestMessageAPI(TestEmailBase):
         msg = email.message_from_bytes(source)
         self.assertEqual(msg.as_string(), expected)
 
+    def test_nonascii_as_string_with_ascii_charset(self):
+        m = textwrap.dedent("""\
+            MIME-Version: 1.0
+            Content-type: text/plain; charset="us-ascii"
+            Content-Transfer-Encoding: 8bit
+
+            Test if non-ascii messages with no Content-Transfer-Encoding set
+            can be as_string'd:
+            Föö bär
+            """)
+        source = m.encode('iso-8859-1')
+        expected = source.decode('ascii', 'replace')
+        msg = email.message_from_bytes(source)
+        self.assertEqual(msg.as_string(), expected)
+
     def test_nonascii_as_string_without_content_type_and_cte(self):
         m = textwrap.dedent("""\
             MIME-Version: 1.0
diff --git a/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst b/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst
new file mode 100644 (file)
index 0000000..da62f8a
--- /dev/null
@@ -0,0 +1,4 @@
+Fix UnicodeEncodeError in :meth:`email.Message.as_string` that results when
+a message that claims to be in the ascii character set actually has non-ascii
+characters. Non-ascii characters are now replaced with the U+FFFD replacement
+character, like in the ``replace`` error handler.