]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.15] gh-150771: Fix email serialization for shift_jis and euc-jp (GH-151120) (GH...
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Tue, 16 Jun 2026 12:18:56 +0000 (14:18 +0200)
committerGitHub <noreply@github.com>
Tue, 16 Jun 2026 12:18:56 +0000 (12:18 +0000)
Encode the payload with output_charset instead of input_charset.
(cherry picked from commit 0777a58d8012bbdd0d72654b56f9112686ae6ff0)

Co-authored-by: dev <b.chouksey27@gmail.com>
Lib/email/contentmanager.py
Lib/test/test_email/test_contentmanager.py
Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst [new file with mode: 0644]

index faf2626bccce6519cd4354160c1802af37833da0..c0090af716575d73060e9bc299e1dd31192b5e85 100644 (file)
@@ -174,7 +174,8 @@ def set_text_content(msg, string, subtype="plain", charset='utf-8', cte=None,
                      params=None, headers=None):
     _prepare_set(msg, 'text', subtype, headers)
 
-    charset = email.charset.Charset(charset).input_charset
+    cs = email.charset.Charset(charset)
+    charset = cs.output_charset
     cte, payload = _encode_text(string, charset, cte, msg.policy)
     msg.set_payload(payload)
     msg.set_param('charset', charset, replace=True)
index 0b1b6e89f8c99229106242e30f756476c9562b38..3115941f87031944e81c7cac8a5ad1c358554175 100644 (file)
@@ -362,6 +362,46 @@ class TestRawDataManager(TestEmailBase):
         self.assertEqual(m.get_payload(decode=True), content.encode('ks_c_5601-1987'))
         self.assertEqual(m.get_content(), content)
 
+    def test_set_text_charset_shift_jis(self):
+        m = self._make_message()
+        content = "\u65e5\u672c\u8a9e\n"
+        raw_data_manager.set_content(m, content, charset='shift_jis')
+        self.assertEqual(m['Content-Type'], 'text/plain; charset="iso-2022-jp"')
+        self.assertEqual(m.get_payload(decode=True), content.encode('iso-2022-jp'))
+        self.assertEqual(m.get_content(), content)
+        self.assertEqual(str(m), textwrap.dedent("""\
+            Content-Type: text/plain; charset="iso-2022-jp"
+            Content-Transfer-Encoding: 7bit
+
+            \x1b$BF|K\\8l\x1b(B
+            """))
+        self.assertEqual(bytes(m), textwrap.dedent("""\
+            Content-Type: text/plain; charset="iso-2022-jp"
+            Content-Transfer-Encoding: 7bit
+
+            \u65e5\u672c\u8a9e
+            """).encode('iso-2022-jp'))
+
+    def test_set_text_charset_euc_jp(self):
+        m = self._make_message()
+        content = "\u65e5\u672c\u8a9e\n"
+        raw_data_manager.set_content(m, content, charset='euc-jp')
+        self.assertEqual(m['Content-Type'], 'text/plain; charset="iso-2022-jp"')
+        self.assertEqual(m.get_payload(decode=True), content.encode('iso-2022-jp'))
+        self.assertEqual(m.get_content(), content)
+        self.assertEqual(str(m), textwrap.dedent("""\
+            Content-Type: text/plain; charset="iso-2022-jp"
+            Content-Transfer-Encoding: 7bit
+
+            \x1b$BF|K\\8l\x1b(B
+            """))
+        self.assertEqual(bytes(m), textwrap.dedent("""\
+            Content-Type: text/plain; charset="iso-2022-jp"
+            Content-Transfer-Encoding: 7bit
+
+            \u65e5\u672c\u8a9e
+            """).encode('iso-2022-jp'))
+
     def test_set_text_plain_long_line_heuristics(self):
         m = self._make_message()
         content = ("Simple but long message that is over 78 characters"
diff --git a/Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst b/Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst
new file mode 100644 (file)
index 0000000..6535e5c
--- /dev/null
@@ -0,0 +1,4 @@
+Fix :mod:`email` messages created with ``shift_jis`` or ``euc-jp`` charsets.
+``set_content()`` now stores the payload using the output charset
+(``iso-2022-jp``) so printing the message no longer raises
+:exc:`UnicodeEncodeError`.