From: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> Date: Tue, 5 Mar 2024 17:27:22 +0000 (+0100) Subject: [3.12] gh-76511: Fix email.Message.as_string() for non-ASCII message with ASCII chars... X-Git-Tag: v3.12.3~146 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=85c32ad9fd3780d8cc68a692c396cc840c093d69;p=thirdparty%2FPython%2Fcpython.git [3.12] gh-76511: Fix email.Message.as_string() for non-ASCII message with ASCII charset (GH-116125) (GH-116364) (cherry picked from commit f97f25ef5dfcdfec0d9a359fd970abd139cf3428) Co-authored-by: Serhiy Storchaka --- diff --git a/Lib/email/generator.py b/Lib/email/generator.py index 7ccbe10eb768..c8056ad47baa 100644 --- a/Lib/email/generator.py +++ b/Lib/email/generator.py @@ -243,7 +243,7 @@ class Generator: # existing message. msg = deepcopy(msg) del msg['content-transfer-encoding'] - msg.set_payload(payload, charset) + msg.set_payload(msg._payload, charset) payload = msg.get_payload() self._munge_cte = (msg['content-transfer-encoding'], msg['content-type']) diff --git a/Lib/email/message.py b/Lib/email/message.py index fe769580fed5..a14cca56b374 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -340,7 +340,7 @@ class Message: return if not isinstance(charset, Charset): charset = Charset(charset) - payload = payload.encode(charset.output_charset) + payload = payload.encode(charset.output_charset, 'surrogateescape') if hasattr(payload, 'decode'): self._payload = payload.decode('ascii', 'surrogateescape') else: diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 2a237095b908..a373c53c7c79 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -336,6 +336,21 @@ class TestMessageAPI(TestEmailBase): msg = email.message_from_bytes(source) self.assertEqual(msg.as_string(), expected) + def test_nonascii_as_string_with_ascii_charset(self): + m = textwrap.dedent("""\ + MIME-Version: 1.0 + Content-type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 8bit + + Test if non-ascii messages with no Content-Transfer-Encoding set + can be as_string'd: + Föö bär + """) + source = m.encode('iso-8859-1') + expected = source.decode('ascii', 'replace') + msg = email.message_from_bytes(source) + self.assertEqual(msg.as_string(), expected) + def test_nonascii_as_string_without_content_type_and_cte(self): m = textwrap.dedent("""\ MIME-Version: 1.0 diff --git a/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst b/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst new file mode 100644 index 000000000000..da62f8a24507 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst @@ -0,0 +1,4 @@ +Fix UnicodeEncodeError in :meth:`email.Message.as_string` that results when +a message that claims to be in the ascii character set actually has non-ascii +characters. Non-ascii characters are now replaced with the U+FFFD replacement +character, like in the ``replace`` error handler.