[3.12] gh-124452: Fix header mismatches when folding/unfolding with email message...

author Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>

Sun, 17 Nov 2024 20:12:29 +0000 (21:12 +0100)

committer GitHub <noreply@github.com>

Sun, 17 Nov 2024 20:12:29 +0000 (15:12 -0500)
author Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Sun, 17 Nov 2024 20:12:29 +0000 (21:12 +0100)
committer GitHub <noreply@github.com>
Sun, 17 Nov 2024 20:12:29 +0000 (15:12 -0500)
diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py

index 5f9aa9fb091fa271695794a4aa6b04de42713387..c9f0d743090e549f704f3d57b640f61f37f56024 100644 (file)
--- a/Lib/email/_policybase.py
+++ b/Lib/email/_policybase.py
@@ -302,12 +302,12 @@ class Compat32(Policy):
          """+
          The name is parsed as everything up to the ':' and returned unmodified.
          The value is determined by stripping leading whitespace off the
-        remainder of the first line, joining all subsequent lines together, and
+        remainder of the first line joined with all subsequent lines, and
          stripping any trailing carriage return or linefeed characters.
  
          """
          name, value = sourcelines[0].split(':', 1)
-        value = value.lstrip(' \t') + ''.join(sourcelines[1:])
+        value = ''.join((value, *sourcelines[1:])).lstrip(' \t\r\n')
          return (name, value.rstrip('\r\n'))
  
      def header_store_parse(self, name, value):
diff --git a/Lib/email/policy.py b/Lib/email/policy.py

index 46b7de5bb6d8ae873a57b9821bcef88ba1ec9c81..6e109b65011a4407b3fca613e1e385001f6a8ac0 100644 (file)
--- a/Lib/email/policy.py
+++ b/Lib/email/policy.py
@@ -119,13 +119,13 @@ class EmailPolicy(Policy):
          """+
          The name is parsed as everything up to the ':' and returned unmodified.
          The value is determined by stripping leading whitespace off the
-        remainder of the first line, joining all subsequent lines together, and
+        remainder of the first line joined with all subsequent lines, and
          stripping any trailing carriage return or linefeed characters.  (This
          is the same as Compat32).
  
          """
          name, value = sourcelines[0].split(':', 1)
-        value = value.lstrip(' \t') + ''.join(sourcelines[1:])
+        value = ''.join((value, *sourcelines[1:])).lstrip(' \t\r\n')
          return (name, value.rstrip('\r\n'))
  
      def header_store_parse(self, name, value):
diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py

index 034f7626c1fc7c05c1c3b56601fa50c3e13c7a17..96979db27f3a21b681df87aa7b727912b0f8fcc8 100644 (file)
--- a/Lib/test/test_email/test_message.py
+++ b/Lib/test/test_email/test_message.py
@@ -1,6 +1,6 @@
-import unittest
  import textwrap
-from email import policy, message_from_string
+import unittest
+from email import message_from_bytes, message_from_string, policy
  from email.message import EmailMessage, MIMEPart
  from test.test_email import TestEmailBase, parameterize
  
@@ -958,6 +958,52 @@ class TestEmailMessage(TestEmailMessageBase, TestEmailBase):
                           b'123456789-123456789\n 123456789 Hello '
                           b'=?utf-8?q?W=C3=B6rld!?= 123456789 123456789\n\n')
  
+    def test_folding_with_short_nospace_1(self):
+        # bpo-36520
+        #
+        # Fold a line that contains a long whitespace after
+        # the fold point.
+
+        m = EmailMessage(policy.default)
+        m['Message-ID'] = '123456789' * 3
+        parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
+        self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])
+
+    def test_folding_with_long_nospace_default_policy_1(self):
+        # Fixed: https://github.com/python/cpython/issues/124452
+        #
+        # When the value is too long, it should be converted back
+        # to its original form without any modifications.
+
+        m = EmailMessage(policy.default)
+        message = '123456789' * 10
+        m['Message-ID'] = message
+        self.assertEqual(m.as_bytes(),
+                         f'Message-ID:\n {message}\n\n'.encode())
+        parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
+        self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])
+
+    def test_folding_with_long_nospace_compat32_policy_1(self):
+        m = EmailMessage(policy.compat32)
+        message = '123456789' * 10
+        m['Message-ID'] = message
+        parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
+        self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])
+
+    def test_folding_with_long_nospace_smtp_policy_1(self):
+        m = EmailMessage(policy.SMTP)
+        message = '123456789' * 10
+        m['Message-ID'] = message
+        parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
+        self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])
+
+    def test_folding_with_long_nospace_http_policy_1(self):
+        m = EmailMessage(policy.HTTP)
+        message = '123456789' * 10
+        m['Message-ID'] = message
+        parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
+        self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])
+
      def test_get_body_malformed(self):
          """test for bpo-42892"""
          msg = textwrap.dedent("""\
diff --git a/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst b/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst

new file mode 100644 (file)

index 0000000..b0d6379
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst
@@ -0,0 +1,4 @@
+Fix an issue in :meth:`email.policy.EmailPolicy.header_source_parse` and\r
+:meth:`email.policy.Compat32.header_source_parse` that introduced spurious\r
+leading whitespaces into header values when the header includes a newline\r
+character after the header name delimiter (``:``) and before the value.\r
author	Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
	Sun, 17 Nov 2024 20:12:29 +0000 (21:12 +0100)
committer	GitHub <noreply@github.com>
	Sun, 17 Nov 2024 20:12:29 +0000 (15:12 -0500)
Lib/email/_policybase.py		patch \| blob \| blame \| history
Lib/email/policy.py		patch \| blob \| blame \| history
Lib/test/test_email/test_message.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst	[new file with mode: 0644]	patch \| blob