[3.11] gh-80222: Fix email address header folding with long quoted-string (GH-122753...

author Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>

Wed, 19 Feb 2025 13:12:34 +0000 (14:12 +0100)

committer GitHub <noreply@github.com>

Wed, 19 Feb 2025 13:12:34 +0000 (14:12 +0100)
author Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Wed, 19 Feb 2025 13:12:34 +0000 (14:12 +0100)
committer GitHub <noreply@github.com>
Wed, 19 Feb 2025 13:12:34 +0000 (14:12 +0100)
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py

index 992394ea9fff95169a80a359c09196077c098dd1..045a01bcf1e0d70ca075bb1bb42767e8fd55b6c7 100644 (file)
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -95,8 +95,16 @@ EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%')
  NLSET = {'\n', '\r'}
  SPECIALSNL = SPECIALS | NLSET
  
+
+def make_quoted_pairs(value):
+    """Escape dquote and backslash for use within a quoted-string."""
+    return str(value).replace('\\', '\\\\').replace('"', '\\"')
+
+
  def quote_string(value):
-    return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
+    escaped = make_quoted_pairs(value)
+    return f'"{escaped}"'
+
  
  # Match a RFC 2047 word, looks like =?utf-8?q?someword?=
  rfc2047_matcher = re.compile(r'''
@@ -2866,6 +2874,15 @@ def _refold_parse_tree(parse_tree, *, policy):
          if not hasattr(part, 'encode'):
              # It's not a terminal, try folding the subparts.
              newparts = list(part)
+            if part.token_type == 'bare-quoted-string':
+                # To fold a quoted string we need to create a list of terminal
+                # tokens that will render the leading and trailing quotes
+                # and use quoted pairs in the value as appropriate.
+                newparts = (
+                    [ValueTerminal('"', 'ptext')] +
+                    [ValueTerminal(make_quoted_pairs(p), 'ptext')
+                     for p in newparts] +
+                    [ValueTerminal('"', 'ptext')])
              if not part.as_ew_allowed:
                  wrap_as_ew_blocked += 1
                  newparts.append(end_ew_not_allowed)
diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py

index 0d9343478ce7f3719463d821aabc4f598016d727..cd6495490e3d553cc482e63f1d1d4d2ef4568bff 100644 (file)
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -2991,13 +2991,40 @@ class TestFolding(TestEmailBase):
          self._test(parser.get_address_list(to)[0],
              f'{a},\n =?utf-8?q?H=C3=BCbsch?= Kaktus <beautiful@example.com>\n')
  
-        a = '.' * 79
+        a = '.' * 79  # ('.' is a special, so must be in quoted-string.)
          to = f'"{a}" <xyz@example.com>, "Hübsch Kaktus" <beautiful@example.com>'
          self._test(parser.get_address_list(to)[0],
-            f'{a}\n'
+            f'"{a}"\n'
              ' <xyz@example.com>, =?utf-8?q?H=C3=BCbsch?= Kaktus '
              '<beautiful@example.com>\n')
  
+    def test_address_list_with_specials_in_long_quoted_string(self):
+        # Regression for gh-80222.
+        policy = self.policy.clone(max_line_length=40)
+        cases = [
+            # (to, folded)
+            ('"Exfiltrator <spy@example.org> (unclosed comment?" <to@example.com>',
+             '"Exfiltrator <spy@example.org> (unclosed\n'
+             ' comment?" <to@example.com>\n'),
+            ('"Escaped \\" chars \\\\ in quoted-string stay escaped" <to@example.com>',
+             '"Escaped \\" chars \\\\ in quoted-string\n'
+             ' stay escaped" <to@example.com>\n'),
+            ('This long display name does not need quotes <to@example.com>',
+             'This long display name does not need\n'
+             ' quotes <to@example.com>\n'),
+            ('"Quotes are not required but are retained here" <to@example.com>',
+             '"Quotes are not required but are\n'
+             ' retained here" <to@example.com>\n'),
+            ('"A quoted-string, it can be a valid local-part"@example.com',
+             '"A quoted-string, it can be a valid\n'
+             ' local-part"@example.com\n'),
+            ('"local-part-with-specials@but-no-fws.cannot-fold"@example.com',
+             '"local-part-with-specials@but-no-fws.cannot-fold"@example.com\n'),
+        ]
+        for (to, folded) in cases:
+            with self.subTest(to=to):
+                self._test(parser.get_address_list(to)[0], folded, policy=policy)
+
      # XXX Need tests with comments on various sides of a unicode token,
      # and with unicode tokens in the comments.  Spaces inside the quotes
      # currently don't do the right thing.
diff --git a/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst b/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst

new file mode 100644 (file)

index 0000000..0f0661d
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst
@@ -0,0 +1,6 @@
+Fix bug in the folding of quoted strings when flattening an email message using
+a modern email policy. Previously when a quoted string was folded so that
+it spanned more than one line, the surrounding quotes and internal escapes
+would be omitted. This could theoretically be used to spoof header lines
+using a carefully constructed quoted string if the resulting rendered email
+was transmitted or re-parsed.
author	Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
	Wed, 19 Feb 2025 13:12:34 +0000 (14:12 +0100)
committer	GitHub <noreply@github.com>
	Wed, 19 Feb 2025 13:12:34 +0000 (14:12 +0100)
Lib/email/_header_value_parser.py		patch \| blob \| blame \| history
Lib/test/test_email/test__header_value_parser.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst	[new file with mode: 0644]	patch \| blob