]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.11] gh-76511: Fix email.Message.as_string() for non-ASCII message with ASCII chars...
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Tue, 5 Mar 2024 17:32:30 +0000 (18:32 +0100)
committerGitHub <noreply@github.com>
Tue, 5 Mar 2024 17:32:30 +0000 (17:32 +0000)
(cherry picked from commit f97f25ef5dfcdfec0d9a359fd970abd139cf3428)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
Lib/email/generator.py
Lib/email/message.py
Lib/test/test_email/test_email.py
Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst [new file with mode: 0644]

index b8c10917a5d98cf1a845137bcaeecaaa24d08ab0..eb597de76d42efe27326243066f19b9cc1c57024 100644 (file)
@@ -243,7 +243,7 @@ class Generator:
                 # existing message.
                 msg = deepcopy(msg)
                 del msg['content-transfer-encoding']
-                msg.set_payload(payload, charset)
+                msg.set_payload(msg._payload, charset)
                 payload = msg.get_payload()
                 self._munge_cte = (msg['content-transfer-encoding'],
                                    msg['content-type'])
index 4e9536b8563e3586e762ef820a3fdb596e28e465..492a6b9a4309fafd1a025e2fa68fdb8770ece105 100644 (file)
@@ -340,7 +340,7 @@ class Message:
                 return
             if not isinstance(charset, Charset):
                 charset = Charset(charset)
-            payload = payload.encode(charset.output_charset)
+            payload = payload.encode(charset.output_charset, 'surrogateescape')
         if hasattr(payload, 'decode'):
             self._payload = payload.decode('ascii', 'surrogateescape')
         else:
index 677f2094b835f3540775a565cdb70b517ce2143f..785696e5c541fb80d7bbdd1bfb56bd190da45d81 100644 (file)
@@ -337,6 +337,21 @@ class TestMessageAPI(TestEmailBase):
         msg = email.message_from_bytes(source)
         self.assertEqual(msg.as_string(), expected)
 
+    def test_nonascii_as_string_with_ascii_charset(self):
+        m = textwrap.dedent("""\
+            MIME-Version: 1.0
+            Content-type: text/plain; charset="us-ascii"
+            Content-Transfer-Encoding: 8bit
+
+            Test if non-ascii messages with no Content-Transfer-Encoding set
+            can be as_string'd:
+            Föö bär
+            """)
+        source = m.encode('iso-8859-1')
+        expected = source.decode('ascii', 'replace')
+        msg = email.message_from_bytes(source)
+        self.assertEqual(msg.as_string(), expected)
+
     def test_nonascii_as_string_without_content_type_and_cte(self):
         m = textwrap.dedent("""\
             MIME-Version: 1.0
diff --git a/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst b/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst
new file mode 100644 (file)
index 0000000..da62f8a
--- /dev/null
@@ -0,0 +1,4 @@
+Fix UnicodeEncodeError in :meth:`email.Message.as_string` that results when
+a message that claims to be in the ascii character set actually has non-ascii
+characters. Non-ascii characters are now replaced with the U+FFFD replacement
+character, like in the ``replace`` error handler.