]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.12] gh-98188: Fix EmailMessage.get_payload to decode data when CTE value has extra...
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Tue, 7 Jan 2025 17:44:56 +0000 (18:44 +0100)
committerGitHub <noreply@github.com>
Tue, 7 Jan 2025 17:44:56 +0000 (12:44 -0500)
gh-98188: Fix EmailMessage.get_payload to decode data when CTE value has extra text (GH-127547)

Up to this point message handling has been very strict with regards to content encoding values: mixed case was accepted, but trailing blanks or other text would cause decoding failure, even if the first token was a valid encoding.  By Postel's Rule we should go ahead and decode as long as we can recognize that first token.  We have not thought of any security or backward compatibility concerns with this fix.

This fix does introduce a new technique/pattern to the Message code: we look to see if the header has a 'cte' attribute, and if so we use that.  This effectively promotes the header API exposed by HeaderRegistry to an API that any header parser "should" support.  This seems like a reasonable thing to do.  It is not, however, a requirement, as the string value of the header is still used if there is no cte attribute.

The full fix (ignore any trailing blanks or blank-separated trailing text) applies only to the non-compat32 API.  compat32 is only fixed to the extent that it now ignores trailing spaces.  Note that the HeaderRegistry parsing still records a HeaderDefect if there is extra text.

(cherry picked from commit a62ba52f1439c1f878a3ff9b8544caf9aeef9b90)

Co-authored-by: RanKKI <hliu86.me@gmail.com>
Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
Lib/email/message.py
Lib/test/test_email/test_email.py
Lib/test/test_email/test_headerregistry.py
Misc/ACKS
Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst [new file with mode: 0644]

index 46bb8c21942af85ed4dada333ab6b34e6c86f3e2..6b7c3a2377765a2b6748384f384313eac7357b73 100644 (file)
@@ -286,8 +286,12 @@ class Message:
         if i is not None and not isinstance(self._payload, list):
             raise TypeError('Expected list, got %s' % type(self._payload))
         payload = self._payload
-        # cte might be a Header, so for now stringify it.
-        cte = str(self.get('content-transfer-encoding', '')).lower()
+        cte = self.get('content-transfer-encoding', '')
+        if hasattr(cte, 'cte'):
+            cte = cte.cte
+        else:
+            # cte might be a Header, so for now stringify it.
+            cte = str(cte).strip().lower()
         # payload may be bytes here.
         if not decode:
             if isinstance(payload, str) and utils._has_surrogates(payload):
index ef8aa0d53c59ac290c141ba514efc1e9d69858c5..e33946de350ec0210b27fe1a3779a807ae803940 100644 (file)
@@ -810,6 +810,16 @@ class TestMessageAPI(TestEmailBase):
             w4kgdGVzdGFiYwo=
             """))
 
+    def test_string_payload_with_base64_cte(self):
+        msg = email.message_from_string(textwrap.dedent("""\
+        Content-Transfer-Encoding: base64
+
+        SGVsbG8uIFRlc3Rpbmc=
+        """), policy=email.policy.default)
+        self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing")
+        self.assertDefectsEqual(msg['content-transfer-encoding'].defects, [])
+
+
 
 # Test the email.encoders module
 class TestEncoders(unittest.TestCase):
@@ -2352,6 +2362,40 @@ counter to RFC 2822, there's no separating newline here
         self.assertDefectsEqual(msg.defects,
                                 [errors.MissingHeaderBodySeparatorDefect])
 
+    def test_string_payload_with_extra_space_after_cte(self):
+        # https://github.com/python/cpython/issues/98188
+        cte = "base64 "
+        msg = email.message_from_string(textwrap.dedent(f"""\
+        Content-Transfer-Encoding: {cte}
+
+        SGVsbG8uIFRlc3Rpbmc=
+        """), policy=email.policy.default)
+        self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing")
+        self.assertDefectsEqual(msg['content-transfer-encoding'].defects, [])
+
+    def test_string_payload_with_extra_text_after_cte(self):
+        msg = email.message_from_string(textwrap.dedent("""\
+        Content-Transfer-Encoding: base64 some text
+
+        SGVsbG8uIFRlc3Rpbmc=
+        """), policy=email.policy.default)
+        self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing")
+        cte = msg['content-transfer-encoding']
+        self.assertDefectsEqual(cte.defects, [email.errors.InvalidHeaderDefect])
+
+    def test_string_payload_with_extra_space_after_cte_compat32(self):
+        cte = "base64 "
+        msg = email.message_from_string(textwrap.dedent(f"""\
+        Content-Transfer-Encoding: {cte}
+
+        SGVsbG8uIFRlc3Rpbmc=
+        """), policy=email.policy.compat32)
+        pasted_cte = msg['content-transfer-encoding']
+        self.assertEqual(pasted_cte, cte)
+        self.assertEqual(msg.get_payload(decode=True), b"Hello. Testing")
+        self.assertDefectsEqual(msg.defects, [])
+
+
 
 # Test RFC 2047 header encoding and decoding
 class TestRFC2047(TestEmailBase):
index 5a608a033c7e5412797140b6af4a4a6c54c7b61d..a579f9eeb7f4bf3ac98c1187759132ca3b0409a5 100644 (file)
@@ -838,6 +838,11 @@ class TestContentTransferEncoding(TestHeaderBase):
             '7bit',
             [errors.InvalidHeaderDefect]),
 
+        'extra_space_after_cte': (
+            'base64 ',
+            'base64',
+            []),
+
     }
 
 
index 0f8b79f26fbf6b04780d6537ad1c2f470923c3d2..14450325647590cbd3bd001e4599af89fd2150d2 100644 (file)
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1112,6 +1112,7 @@ Gregor Lingl
 Everett Lipman
 Mirko Liss
 Alexander Liu
+Hui Liu
 Yuan Liu
 Nick Lockwood
 Stephanie Lockwood
diff --git a/Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst b/Misc/NEWS.d/next/Library/2024-12-03-14-45-16.gh-issue-98188.GX9i2b.rst
new file mode 100644 (file)
index 0000000..30ab8cf
--- /dev/null
@@ -0,0 +1,3 @@
+Fix an issue in :meth:`email.message.Message.get_payload` where data
+cannot be decoded if the Content Transfer Encoding mechanism contains
+trailing whitespaces or additional junk text. Patch by Hui Liu.