]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.11] gh-80222: Fix email address header folding with long quoted-string (GH-122753...
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Wed, 19 Feb 2025 13:12:34 +0000 (14:12 +0100)
committerGitHub <noreply@github.com>
Wed, 19 Feb 2025 13:12:34 +0000 (14:12 +0100)
Email generators using email.policy.default could incorrectly omit the
quote ('"') characters from a quoted-string during header refolding,
leading to invalid address headers and enabling header spoofing. This
change restores the quote characters on a bare-quoted-string as the
header is refolded, and escapes backslash and quote chars in the string.
(cherry picked from commit 5aaf41685834901e4ed0a40f4c055b92991a0bb5)

Co-authored-by: Mike Edmunds <medmunds@gmail.com>
Co-authored-by: Petr Viktorin <encukou@gmail.com>
Lib/email/_header_value_parser.py
Lib/test/test_email/test__header_value_parser.py
Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst [new file with mode: 0644]

index 992394ea9fff95169a80a359c09196077c098dd1..045a01bcf1e0d70ca075bb1bb42767e8fd55b6c7 100644 (file)
@@ -95,8 +95,16 @@ EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%')
 NLSET = {'\n', '\r'}
 SPECIALSNL = SPECIALS | NLSET
 
+
+def make_quoted_pairs(value):
+    """Escape dquote and backslash for use within a quoted-string."""
+    return str(value).replace('\\', '\\\\').replace('"', '\\"')
+
+
 def quote_string(value):
-    return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
+    escaped = make_quoted_pairs(value)
+    return f'"{escaped}"'
+
 
 # Match a RFC 2047 word, looks like =?utf-8?q?someword?=
 rfc2047_matcher = re.compile(r'''
@@ -2866,6 +2874,15 @@ def _refold_parse_tree(parse_tree, *, policy):
         if not hasattr(part, 'encode'):
             # It's not a terminal, try folding the subparts.
             newparts = list(part)
+            if part.token_type == 'bare-quoted-string':
+                # To fold a quoted string we need to create a list of terminal
+                # tokens that will render the leading and trailing quotes
+                # and use quoted pairs in the value as appropriate.
+                newparts = (
+                    [ValueTerminal('"', 'ptext')] +
+                    [ValueTerminal(make_quoted_pairs(p), 'ptext')
+                     for p in newparts] +
+                    [ValueTerminal('"', 'ptext')])
             if not part.as_ew_allowed:
                 wrap_as_ew_blocked += 1
                 newparts.append(end_ew_not_allowed)
index 0d9343478ce7f3719463d821aabc4f598016d727..cd6495490e3d553cc482e63f1d1d4d2ef4568bff 100644 (file)
@@ -2991,13 +2991,40 @@ class TestFolding(TestEmailBase):
         self._test(parser.get_address_list(to)[0],
             f'{a},\n =?utf-8?q?H=C3=BCbsch?= Kaktus <beautiful@example.com>\n')
 
-        a = '.' * 79
+        a = '.' * 79  # ('.' is a special, so must be in quoted-string.)
         to = f'"{a}" <xyz@example.com>, "Hübsch Kaktus" <beautiful@example.com>'
         self._test(parser.get_address_list(to)[0],
-            f'{a}\n'
+            f'"{a}"\n'
             ' <xyz@example.com>, =?utf-8?q?H=C3=BCbsch?= Kaktus '
             '<beautiful@example.com>\n')
 
+    def test_address_list_with_specials_in_long_quoted_string(self):
+        # Regression for gh-80222.
+        policy = self.policy.clone(max_line_length=40)
+        cases = [
+            # (to, folded)
+            ('"Exfiltrator <spy@example.org> (unclosed comment?" <to@example.com>',
+             '"Exfiltrator <spy@example.org> (unclosed\n'
+             ' comment?" <to@example.com>\n'),
+            ('"Escaped \\" chars \\\\ in quoted-string stay escaped" <to@example.com>',
+             '"Escaped \\" chars \\\\ in quoted-string\n'
+             ' stay escaped" <to@example.com>\n'),
+            ('This long display name does not need quotes <to@example.com>',
+             'This long display name does not need\n'
+             ' quotes <to@example.com>\n'),
+            ('"Quotes are not required but are retained here" <to@example.com>',
+             '"Quotes are not required but are\n'
+             ' retained here" <to@example.com>\n'),
+            ('"A quoted-string, it can be a valid local-part"@example.com',
+             '"A quoted-string, it can be a valid\n'
+             ' local-part"@example.com\n'),
+            ('"local-part-with-specials@but-no-fws.cannot-fold"@example.com',
+             '"local-part-with-specials@but-no-fws.cannot-fold"@example.com\n'),
+        ]
+        for (to, folded) in cases:
+            with self.subTest(to=to):
+                self._test(parser.get_address_list(to)[0], folded, policy=policy)
+
     # XXX Need tests with comments on various sides of a unicode token,
     # and with unicode tokens in the comments.  Spaces inside the quotes
     # currently don't do the right thing.
diff --git a/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst b/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst
new file mode 100644 (file)
index 0000000..0f0661d
--- /dev/null
@@ -0,0 +1,6 @@
+Fix bug in the folding of quoted strings when flattening an email message using
+a modern email policy. Previously when a quoted string was folded so that
+it spanned more than one line, the surrounding quotes and internal escapes
+would be omitted. This could theoretically be used to spoof header lines
+using a carefully constructed quoted string if the resulting rendered email
+was transmitted or re-parsed.