]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
bpo-39040: Fix parsing of email mime headers with whitespace between encoded-words...
authorAbhilash Raj <maxking@users.noreply.github.com>
Fri, 29 May 2020 00:04:59 +0000 (17:04 -0700)
committerGitHub <noreply@github.com>
Fri, 29 May 2020 00:04:59 +0000 (20:04 -0400)
* bpo-39040: Fix parsing of email headers with encoded-words inside a quoted string.

It is fairly common to find malformed mime headers (especially content-disposition
headers) where the parameter values, instead of being encoded to RFC
standards, are "encoded" by doing RFC 2047 "encoded word" encoding, and
then enclosing the whole thing in quotes.  The processing of these malformed
headers was incorrectly leaving the spaces between encoded words in the decoded
text (whitespace between adjacent encoded words is supposed to be stripped on
decoding).  This changeset fixes the encoded word processing inside quoted strings
(bare-quoted-string) to do correct RFC 2047 decoding by stripping that
whitespace.

Lib/email/_header_value_parser.py
Lib/test/test_email/test_headerregistry.py
Misc/NEWS.d/next/Library/2019-12-15-18-47-20.bpo-39040.tKa0Qs.rst [new file with mode: 0644]

index 9c55ef7fb453bee0b1069e019c4dbd482f1bc6da..51d355fbb0abc5460835414998983717256e9364 100644 (file)
@@ -1218,12 +1218,21 @@ def get_bare_quoted_string(value):
         if value[0] in WSP:
             token, value = get_fws(value)
         elif value[:2] == '=?':
+            valid_ew = False
             try:
                 token, value = get_encoded_word(value)
                 bare_quoted_string.defects.append(errors.InvalidHeaderDefect(
                     "encoded word inside quoted string"))
+                valid_ew = True
             except errors.HeaderParseError:
                 token, value = get_qcontent(value)
+            # Collapse the whitespace between two encoded words that occur in a
+            # bare-quoted-string.
+            if valid_ew and len(bare_quoted_string) > 1:
+                if (bare_quoted_string[-1].token_type == 'fws' and
+                        bare_quoted_string[-2].token_type == 'encoded-word'):
+                    bare_quoted_string[-1] = EWWhiteSpaceTerminal(
+                        bare_quoted_string[-1], 'fws')
         else:
             token, value = get_qcontent(value)
         bare_quoted_string.append(token)
index 82e121350ffbf5f3d165457c9ee7fb05a1206b32..68bbc9561c4aff1f238b362df0f2ef1a1a30952e 100644 (file)
@@ -873,6 +873,25 @@ class TestContentDisposition(TestHeaderBase):
             {'filename': 'foo'},
             [errors.InvalidHeaderDefect]),
 
+        'invalid_parameter_value_with_fws_between_ew': (
+            'attachment; filename="=?UTF-8?Q?Schulbesuchsbest=C3=A4ttigung=2E?='
+            '               =?UTF-8?Q?pdf?="',
+            'attachment',
+            {'filename': 'Schulbesuchsbestättigung.pdf'},
+            [errors.InvalidHeaderDefect]*3,
+            ('attachment; filename="Schulbesuchsbestättigung.pdf"'),
+            ('Content-Disposition: attachment;\n'
+             ' filename*=utf-8\'\'Schulbesuchsbest%C3%A4ttigung.pdf\n'),
+            ),
+
+        'parameter_value_with_fws_between_tokens': (
+            'attachment; filename="File =?utf-8?q?Name?= With Spaces.pdf"',
+            'attachment',
+            {'filename': 'File Name With Spaces.pdf'},
+            [errors.InvalidHeaderDefect],
+            'attachment; filename="File Name With Spaces.pdf"',
+            ('Content-Disposition: attachment; filename="File Name With Spaces.pdf"\n'),
+            )
     }
 
 
diff --git a/Misc/NEWS.d/next/Library/2019-12-15-18-47-20.bpo-39040.tKa0Qs.rst b/Misc/NEWS.d/next/Library/2019-12-15-18-47-20.bpo-39040.tKa0Qs.rst
new file mode 100644 (file)
index 0000000..078bce2
--- /dev/null
@@ -0,0 +1,2 @@
+Fix parsing of invalid mime headers parameters by collapsing whitespace between
+encoded words in a bare-quote-string.