From: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> Date: Tue, 5 Mar 2024 17:32:30 +0000 (+0100) Subject: [3.11] gh-76511: Fix email.Message.as_string() for non-ASCII message with ASCII chars... X-Git-Tag: v3.11.9~97 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=c75df4b13a1ec6317699142b29380bc500f52dcf;p=thirdparty%2FPython%2Fcpython.git [3.11] gh-76511: Fix email.Message.as_string() for non-ASCII message with ASCII charset (GH-116125) (GH-116365) (cherry picked from commit f97f25ef5dfcdfec0d9a359fd970abd139cf3428) Co-authored-by: Serhiy Storchaka --- diff --git a/Lib/email/generator.py b/Lib/email/generator.py index b8c10917a5d9..eb597de76d42 100644 --- a/Lib/email/generator.py +++ b/Lib/email/generator.py @@ -243,7 +243,7 @@ class Generator: # existing message. msg = deepcopy(msg) del msg['content-transfer-encoding'] - msg.set_payload(payload, charset) + msg.set_payload(msg._payload, charset) payload = msg.get_payload() self._munge_cte = (msg['content-transfer-encoding'], msg['content-type']) diff --git a/Lib/email/message.py b/Lib/email/message.py index 4e9536b8563e..492a6b9a4309 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -340,7 +340,7 @@ class Message: return if not isinstance(charset, Charset): charset = Charset(charset) - payload = payload.encode(charset.output_charset) + payload = payload.encode(charset.output_charset, 'surrogateescape') if hasattr(payload, 'decode'): self._payload = payload.decode('ascii', 'surrogateescape') else: diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 677f2094b835..785696e5c541 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -337,6 +337,21 @@ class TestMessageAPI(TestEmailBase): msg = email.message_from_bytes(source) self.assertEqual(msg.as_string(), expected) + def test_nonascii_as_string_with_ascii_charset(self): + m = textwrap.dedent("""\ + MIME-Version: 1.0 + Content-type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 8bit + + Test if non-ascii messages with no Content-Transfer-Encoding set + can be as_string'd: + Föö bär + """) + source = m.encode('iso-8859-1') + expected = source.decode('ascii', 'replace') + msg = email.message_from_bytes(source) + self.assertEqual(msg.as_string(), expected) + def test_nonascii_as_string_without_content_type_and_cte(self): m = textwrap.dedent("""\ MIME-Version: 1.0 diff --git a/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst b/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst new file mode 100644 index 000000000000..da62f8a24507 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst @@ -0,0 +1,4 @@ +Fix UnicodeEncodeError in :meth:`email.Message.as_string` that results when +a message that claims to be in the ascii character set actually has non-ascii +characters. Non-ascii characters are now replaced with the U+FFFD replacement +character, like in the ``replace`` error handler.